From 5b710d7dc1900b4343f2bd99b7cf432bb955668c Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 1 Mar 2023 16:47:16 -0800
Subject: [PATCH 01/17] Remove some params.

---
 cpp/include/cudf/detail/copy.hpp         |  8 ++---
 cpp/include/cudf/detail/gather.cuh       | 15 +++++-----
 cpp/include/cudf/detail/hashing.hpp      | 38 +++++++++++-------------
 cpp/include/cudf/lists/detail/gather.cuh | 32 +++++++++-----------
 cpp/tests/copying/detail_gather_tests.cu | 16 +++++-----
 5 files changed, 50 insertions(+), 59 deletions(-)
diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 8c3f315284d..c144f27cc00 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -285,9 +285,9 @@ std::unique_ptr<column> copy_if_else(
 std::unique_ptr<table> sample(
   table_view const& input,
   size_type const n,
-  sample_with_replacement replacement = sample_with_replacement::FALSE,
-  int64_t const seed                  = 0,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  sample_with_replacement replacement,
+  int64_t const seed,
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index 57d834e6277..ac2865c05c5 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -647,13 +647,12 @@ void gather_bitmask(table_view const& source,
  * @return cudf::table Result of the gather
  */
 template <typename MapIterator>
-std::unique_ptr<table> gather(
-  table_view const& source_table,
-  MapIterator gather_map_begin,
-  MapIterator gather_map_end,
-  out_of_bounds_policy bounds_policy  = out_of_bounds_policy::DONT_CHECK,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<table> gather(table_view const& source_table,
+                              MapIterator gather_map_begin,
+                              MapIterator gather_map_end,
+                              out_of_bounds_policy bounds_policy,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr)
 {
   std::vector<std::unique_ptr<column>> destination_columns;
 
diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp
index b7469d80a8d..771b3e150ec 100644
--- a/cpp/include/cudf/detail/hashing.hpp
+++ b/cpp/include/cudf/detail/hashing.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,29 +31,25 @@ namespace detail {
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> hash(
-  table_view const& input,
-  hash_id hash_function               = hash_id::HASH_MURMUR3,
-  uint32_t seed                       = cudf::DEFAULT_HASH_SEED,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> hash(table_view const& input,
+                             hash_id hash_function,
+                             uint32_t seed,
+                             rmm::cuda_stream_view stream,
+                             rmm::mr::device_memory_resource* mr);
 
-std::unique_ptr<column> murmur_hash3_32(
-  table_view const& input,
-  uint32_t seed                       = cudf::DEFAULT_HASH_SEED,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> murmur_hash3_32(table_view const& input,
+                                        uint32_t seed,
+                                        rmm::cuda_stream_view,
+                                        rmm::mr::device_memory_resource* mr);
 
-std::unique_ptr<column> spark_murmur_hash3_32(
-  table_view const& input,
-  uint32_t seed                       = cudf::DEFAULT_HASH_SEED,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> spark_murmur_hash3_32(table_view const& input,
+                                              uint32_t seed,
+                                              rmm::cuda_stream_view,
+                                              rmm::mr::device_memory_resource* mr);
 
-std::unique_ptr<column> md5_hash(
-  table_view const& input,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> md5_hash(table_view const& input,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr);
 
 /* Copyright 2005-2014 Daniel James.
  *
diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh
index 48c0ed8f6e9..83710a49f6a 100644
--- a/cpp/include/cudf/lists/detail/gather.cuh
+++ b/cpp/include/cudf/lists/detail/gather.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -285,11 +285,10 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column,
  *
  * @returns column with elements gathered based on `gather_data`
  */
-std::unique_ptr<column> gather_list_nested(
-  lists_column_view const& list,
-  gather_data& gd,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> gather_list_nested(lists_column_view const& list,
+                                           gather_data& gd,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Gather a leaf column from a hierarchy of list columns.
@@ -303,11 +302,10 @@ std::unique_ptr<column> gather_list_nested(
  *
  * @returns column with elements gathered based on `gather_data`
  */
-std::unique_ptr<column> gather_list_leaf(
-  column_view const& column,
-  gather_data const& gd,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> gather_list_leaf(column_view const& column,
+                                         gather_data const& gd,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::lists::segmented_gather(lists_column_view const& source_column,
@@ -317,13 +315,11 @@ std::unique_ptr<column> gather_list_leaf(
  *
  * @param stream CUDA stream on which to execute kernels
  */
-std::unique_ptr<column> segmented_gather(
-  lists_column_view const& source_column,
-  lists_column_view const& gather_map_list,
-  out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK,
-  // Move before bounds_policy?
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> segmented_gather(lists_column_view const& source_column,
+                                         lists_column_view const& gather_map_list,
+                                         out_of_bounds_policy bounds_policy,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace lists
diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu
index bf2937ae8ab..d4898113afb 100644
--- a/cpp/tests/copying/detail_gather_tests.cu
+++ b/cpp/tests/copying/detail_gather_tests.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -46,9 +46,11 @@ TYPED_TEST_SUITE(GatherTest, cudf::test::NumericTypes);
 TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest)
 {
   constexpr cudf::size_type source_size{1000};
-  rmm::device_uvector<cudf::size_type> gather_map(source_size, cudf::get_default_stream());
-  thrust::sequence(
-    rmm::exec_policy_nosync(cudf::get_default_stream()), gather_map.begin(), gather_map.end());
+  rmm::device_uvector<cudf::size_type> gather_map_vec(source_size, cudf::get_default_stream());
+  auto gather_map = std::make_unique<cudf::column>(std::move(gather_map_vec));
+  thrust::sequence(rmm::exec_policy_nosync(cudf::get_default_stream()),
+                   gather_map->mutable_view().begin<cudf::size_type>(),
+                   gather_map->mutable_view().end<cudf::size_type>());
 
   auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
   cudf::test::fixed_width_column_wrapper<TypeParam> source_column(data, data + source_size);
@@ -57,8 +59,7 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest)
 
   // test with device vector iterators
   {
-    std::unique_ptr<cudf::table> result =
-      cudf::detail::gather(source_table, gather_map.begin(), gather_map.end());
+    std::unique_ptr<cudf::table> result = cudf::gather(source_table, gather_map->view());
 
     for (auto i = 0; i < source_table.num_columns(); ++i) {
       CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i));
@@ -69,8 +70,7 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest)
 
   // test with raw pointers
   {
-    std::unique_ptr<cudf::table> result =
-      cudf::detail::gather(source_table, gather_map.data(), gather_map.data() + gather_map.size());
+    std::unique_ptr<cudf::table> result = cudf::gather(source_table, gather_map->view());
 
     for (auto i = 0; i < source_table.num_columns(); ++i) {
       CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i));

From a26045c97cb57bd3ae021db6ee4a61753ea80506 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 1 Mar 2023 17:00:48 -0800
Subject: [PATCH 02/17] copy APIs.

---
 cpp/include/cudf/detail/copy.hpp     | 119 ++++++++++++---------------
 cpp/src/copying/scatter.cu           |   4 +-
 cpp/src/sort/segmented_sort_impl.cuh |  23 ++++--
 3 files changed, 71 insertions(+), 75 deletions(-)

diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index c144f27cc00..83395f8fa90 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -144,12 +144,11 @@ std::vector<table_view> split(table_view const& input,
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> shift(
-  column_view const& input,
-  size_type offset,
-  scalar const& fill_value,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> shift(column_view const& input,
+                              size_type offset,
+                              scalar const& fill_value,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Performs segmented shifts for specified values.
@@ -184,24 +183,22 @@ std::unique_ptr<column> shift(
  *
  * @note If `offset == 0`, a copy of @p segmented_values is returned.
  */
-std::unique_ptr<column> segmented_shift(
-  column_view const& segmented_values,
-  device_span<size_type const> segment_offsets,
-  size_type offset,
-  scalar const& fill_value,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> segmented_shift(column_view const& segmented_values,
+                                        device_span<size_type const> segment_offsets,
+                                        size_type offset,
+                                        scalar const& fill_value,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::contiguous_split
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  **/
-std::vector<packed_table> contiguous_split(
-  cudf::table_view const& input,
-  std::vector<size_type> const& splits,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::vector<packed_table> contiguous_split(cudf::table_view const& input,
+                                           std::vector<size_type> const& splits,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::pack
@@ -210,7 +207,7 @@ std::vector<packed_table> contiguous_split(
  **/
 packed_columns pack(cudf::table_view const& input,
                     rmm::cuda_stream_view stream,
-                    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+                    rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::allocate_like(column_view const&, size_type, mask_allocation_policy,
@@ -218,12 +215,11 @@ packed_columns pack(cudf::table_view const& input,
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> allocate_like(
-  column_view const& input,
-  size_type size,
-  mask_allocation_policy mask_alloc   = mask_allocation_policy::RETAIN,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> allocate_like(column_view const& input,
+                                      size_type size,
+                                      mask_allocation_policy mask_alloc,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::copy_if_else( column_view const&, column_view const&,
@@ -231,12 +227,11 @@ std::unique_ptr<column> allocate_like(
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> copy_if_else(
-  column_view const& lhs,
-  column_view const& rhs,
-  column_view const& boolean_mask,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> copy_if_else(column_view const& lhs,
+                                     column_view const& rhs,
+                                     column_view const& boolean_mask,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::copy_if_else( scalar const&, column_view const&,
@@ -244,12 +239,11 @@ std::unique_ptr<column> copy_if_else(
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> copy_if_else(
-  scalar const& lhs,
-  column_view const& rhs,
-  column_view const& boolean_mask,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> copy_if_else(scalar const& lhs,
+                                     column_view const& rhs,
+                                     column_view const& boolean_mask,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::copy_if_else( column_view const&, scalar const&,
@@ -257,12 +251,11 @@ std::unique_ptr<column> copy_if_else(
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> copy_if_else(
-  column_view const& lhs,
-  scalar const& rhs,
-  column_view const& boolean_mask,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> copy_if_else(column_view const& lhs,
+                                     scalar const& rhs,
+                                     column_view const& boolean_mask,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::copy_if_else( scalar const&, scalar const&,
@@ -270,36 +263,33 @@ std::unique_ptr<column> copy_if_else(
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> copy_if_else(
-  scalar const& lhs,
-  scalar const& rhs,
-  column_view const& boolean_mask,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> copy_if_else(scalar const& lhs,
+                                     scalar const& rhs,
+                                     column_view const& boolean_mask,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::sample
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<table> sample(
-  table_view const& input,
-  size_type const n,
-  sample_with_replacement replacement,
-  int64_t const seed,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<table> sample(table_view const& input,
+                              size_type const n,
+                              sample_with_replacement replacement,
+                              int64_t const seed,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::get_element
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<scalar> get_element(
-  column_view const& input,
-  size_type index,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<scalar> get_element(column_view const& input,
+                                    size_type index,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::has_nonempty_nulls
@@ -320,10 +310,9 @@ bool may_have_nonempty_nulls(column_view const& input, rmm::cuda_stream_view str
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> purge_nonempty_nulls(
-  column_view const& input,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> purge_nonempty_nulls(column_view const& input,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu
index dd4912a216e..95248f0d98e 100644
--- a/cpp/src/copying/scatter.cu
+++ b/cpp/src/copying/scatter.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -253,7 +253,7 @@ struct column_scalar_scatterer_impl<struct_view, MapIterator> {
 
     auto scatter_functor   = column_scalar_scatterer<decltype(scatter_iter)>{};
     auto fields_iter_begin = make_counting_transform_iterator(0, [&](auto const& i) {
-      auto row_slr = get_element(typed_s->view().column(i), 0, stream);
+      auto row_slr = get_element(typed_s->view().column(i), 0, stream, mr);
       return type_dispatcher<dispatch_storage_type>(row_slr->type(),
                                                     scatter_functor,
                                                     *row_slr,
diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh
index a32382b840f..8c1d57f79bc 100644
--- a/cpp/src/sort/segmented_sort_impl.cuh
+++ b/cpp/src/sort/segmented_sort_impl.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -68,12 +68,13 @@ struct column_fast_sort_fn {
                  column_view const& segment_offsets,
                  mutable_column_view& indices,
                  bool ascending,
-                 rmm::cuda_stream_view stream)
+                 rmm::cuda_stream_view stream,
+                 rmm::mr::device_memory_resource* mr)
   {
     // CUB's segmented sort functions cannot accept iterators.
     // We create a temporary column here for it to use.
     auto temp_col =
-      cudf::detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream);
+      cudf::detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream, mr);
     mutable_column_view output_view = temp_col->mutable_view();
 
     // DeviceSegmentedSort is faster than DeviceSegmentedRadixSort at this time
@@ -128,14 +129,19 @@ struct column_fast_sort_fn {
                   column_view const& segment_offsets,
                   mutable_column_view& indices,
                   bool ascending,
-                  rmm::cuda_stream_view stream)
+                  rmm::cuda_stream_view stream,
+                  rmm::mr::device_memory_resource* mr)
   {
-    fast_sort<T>(input, segment_offsets, indices, ascending, stream);
+    fast_sort<T>(input, segment_offsets, indices, ascending, stream, mr);
   }
 
   template <typename T, CUDF_ENABLE_IF(!is_fast_sort_supported<T>())>
-  void operator()(
-    column_view const&, column_view const&, mutable_column_view&, bool, rmm::cuda_stream_view)
+  void operator()(column_view const&,
+                  column_view const&,
+                  mutable_column_view&,
+                  bool,
+                  rmm::cuda_stream_view,
+                  rmm::mr::device_memory_resource*)
   {
     CUDF_FAIL("Column type cannot be used with fast-sort function");
   }
@@ -171,7 +177,8 @@ std::unique_ptr<column> fast_segmented_sorted_order(column_view const& input,
                                                segment_offsets,
                                                indices_view,
                                                column_order == order::ASCENDING,
-                                               stream);
+                                               stream,
+                                               mr);
   return sorted_indices;
 }
 

From fd5c0e9d1def5824824cff77ecf019dd62afcb87 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 1 Mar 2023 17:09:44 -0800
Subject: [PATCH 03/17] Update arrow calls.

---
 cpp/include/cudf/detail/interop.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp
index 25ce5b09eb8..452144da167 100644
--- a/cpp/include/cudf/detail/interop.hpp
+++ b/cpp/include/cudf/detail/interop.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -111,9 +111,9 @@ data_type arrow_to_cudf_type(arrow::DataType const& arrow_type);
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::shared_ptr<arrow::Table> to_arrow(table_view input,
-                                       std::vector<column_metadata> const& metadata = {},
-                                       rmm::cuda_stream_view stream = cudf::get_default_stream(),
-                                       arrow::MemoryPool* ar_mr     = arrow::default_memory_pool());
+                                       std::vector<column_metadata> const& metadata,
+                                       rmm::cuda_stream_view stream,
+                                       arrow::MemoryPool* ar_mr);
 
 /**
  * @copydoc cudf::arrow_to_cudf

From 2d4c23fb398f7d656c7f70a23e9fde7c90a0a2e0 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 1 Mar 2023 17:19:30 -0800
Subject: [PATCH 04/17] stream_compaction.

---
 cpp/include/cudf/detail/stream_compaction.hpp | 89 +++++++++----------
 cpp/src/copying/scatter.cu                    |  2 +-
 cpp/src/stream_compaction/distinct.cu         |  2 +-
 cpp/src/stream_compaction/stable_distinct.cu  |  4 +-
 4 files changed, 45 insertions(+), 52 deletions(-)

diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp
index e725718ed22..e6cf9bb2c50 100644
--- a/cpp/include/cudf/detail/stream_compaction.hpp
+++ b/cpp/include/cudf/detail/stream_compaction.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,12 +32,11 @@ namespace detail {
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<table> drop_nulls(
-  table_view const& input,
-  std::vector<size_type> const& keys,
-  cudf::size_type keep_threshold,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<table> drop_nulls(table_view const& input,
+                                  std::vector<size_type> const& keys,
+                                  cudf::size_type keep_threshold,
+                                  rmm::cuda_stream_view stream,
+                                  rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::drop_nans(table_view const&, std::vector<size_type> const&,
@@ -45,50 +44,46 @@ std::unique_ptr<table> drop_nulls(
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<table> drop_nans(
-  table_view const& input,
-  std::vector<size_type> const& keys,
-  cudf::size_type keep_threshold,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<table> drop_nans(table_view const& input,
+                                 std::vector<size_type> const& keys,
+                                 cudf::size_type keep_threshold,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::apply_boolean_mask
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<table> apply_boolean_mask(
-  table_view const& input,
-  column_view const& boolean_mask,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<table> apply_boolean_mask(table_view const& input,
+                                          column_view const& boolean_mask,
+                                          rmm::cuda_stream_view stream,
+                                          rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::unique
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<table> unique(
-  table_view const& input,
-  std::vector<size_type> const& keys,
-  duplicate_keep_option keep,
-  null_equality nulls_equal           = null_equality::EQUAL,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<table> unique(table_view const& input,
+                              std::vector<size_type> const& keys,
+                              duplicate_keep_option keep,
+                              null_equality nulls_equal,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::distinct
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<table> distinct(
-  table_view const& input,
-  std::vector<size_type> const& keys,
-  duplicate_keep_option keep          = duplicate_keep_option::KEEP_ANY,
-  null_equality nulls_equal           = null_equality::EQUAL,
-  nan_equality nans_equal             = nan_equality::ALL_EQUAL,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<table> distinct(table_view const& input,
+                                std::vector<size_type> const& keys,
+                                duplicate_keep_option keep,
+                                null_equality nulls_equal,
+                                nan_equality nans_equal,
+                                rmm::cuda_stream_view stream,
+                                rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Create a new table without duplicate rows.
@@ -110,14 +105,13 @@ std::unique_ptr<table> distinct(
  * @param mr Device memory resource used to allocate the returned table
  * @return A table containing the resulting distinct rows
  */
-std::unique_ptr<table> stable_distinct(
-  table_view const& input,
-  std::vector<size_type> const& keys,
-  duplicate_keep_option keep          = duplicate_keep_option::KEEP_ANY,
-  null_equality nulls_equal           = null_equality::EQUAL,
-  nan_equality nans_equal             = nan_equality::ALL_EQUAL,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<table> stable_distinct(table_view const& input,
+                                       std::vector<size_type> const& keys,
+                                       duplicate_keep_option keep,
+                                       null_equality nulls_equal,
+                                       nan_equality nans_equal,
+                                       rmm::cuda_stream_view stream,
+                                       rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Create a column of indices of all distinct rows in the input table.
@@ -133,13 +127,12 @@ std::unique_ptr<table> stable_distinct(
  * @param mr Device memory resource used to allocate the returned vector
  * @return A device_uvector containing the result indices
  */
-rmm::device_uvector<size_type> get_distinct_indices(
-  table_view const& input,
-  duplicate_keep_option keep          = duplicate_keep_option::KEEP_ANY,
-  null_equality nulls_equal           = null_equality::EQUAL,
-  nan_equality nans_equal             = nan_equality::ALL_EQUAL,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+rmm::device_uvector<size_type> get_distinct_indices(table_view const& input,
+                                                    duplicate_keep_option keep,
+                                                    null_equality nulls_equal,
+                                                    nan_equality nans_equal,
+                                                    rmm::cuda_stream_view stream,
+                                                    rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::unique_count(column_view const&, null_policy, nan_policy)
diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu
index 95248f0d98e..273ecaf9adb 100644
--- a/cpp/src/copying/scatter.cu
+++ b/cpp/src/copying/scatter.cu
@@ -393,7 +393,7 @@ std::unique_ptr<column> boolean_mask_scatter(column_view const& input,
 
   // The scatter map is actually a table with only one column, which is scatter map.
   auto scatter_map =
-    detail::apply_boolean_mask(table_view{{indices->view()}}, boolean_mask, stream);
+    detail::apply_boolean_mask(table_view{{indices->view()}}, boolean_mask, stream, mr);
   auto output_table = detail::scatter(
     table_view{{input}}, scatter_map->get_column(0).view(), table_view{{target}}, stream, mr);
 
diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu
index e15d54b4251..9a5a52a5b90 100644
--- a/cpp/src/stream_compaction/distinct.cu
+++ b/cpp/src/stream_compaction/distinct.cu
@@ -146,7 +146,7 @@ std::unique_ptr<table> distinct(table_view const& input,
   }
 
   auto const gather_map =
-    get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream);
+    get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream, mr);
   return detail::gather(input,
                         gather_map,
                         out_of_bounds_policy::DONT_CHECK,
diff --git a/cpp/src/stream_compaction/stable_distinct.cu b/cpp/src/stream_compaction/stable_distinct.cu
index dc80a454777..142018d4336 100644
--- a/cpp/src/stream_compaction/stable_distinct.cu
+++ b/cpp/src/stream_compaction/stable_distinct.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,7 +39,7 @@ std::unique_ptr<table> stable_distinct(table_view const& input,
   }
 
   auto const distinct_indices =
-    get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream);
+    get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream, mr);
 
   // Markers to denote which rows to be copied to the output.
   auto const output_markers = [&] {

From 385b629e43e3e5866177e1ff6174c23203852e57 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 1 Mar 2023 21:36:06 -0800
Subject: [PATCH 05/17] Some binary_ops.

---
 cpp/src/binaryop/compiled/binary_ops.hpp | 80 +++++++++++-------------
 1 file changed, 37 insertions(+), 43 deletions(-)

diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp
index c51993409ef..47fd50c5d97 100644
--- a/cpp/src/binaryop/compiled/binary_ops.hpp
+++ b/cpp/src/binaryop/compiled/binary_ops.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2018-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -32,29 +32,26 @@ class mutable_column_device_view;
 namespace binops {
 namespace compiled {
 
-std::unique_ptr<column> string_null_min_max(
-  scalar const& lhs,
-  column_view const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> string_null_min_max(scalar const& lhs,
+                                            column_view const& rhs,
+                                            binary_operator op,
+                                            data_type output_type,
+                                            rmm::cuda_stream_view stream,
+                                            rmm::mr::device_memory_resource* mr);
 
-std::unique_ptr<column> string_null_min_max(
-  column_view const& lhs,
-  scalar const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> string_null_min_max(column_view const& lhs,
+                                            scalar const& rhs,
+                                            binary_operator op,
+                                            data_type output_type,
+                                            rmm::cuda_stream_view stream,
+                                            rmm::mr::device_memory_resource* mr);
 
-std::unique_ptr<column> string_null_min_max(
-  column_view const& lhs,
-  column_view const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> string_null_min_max(column_view const& lhs,
+                                            column_view const& rhs,
+                                            binary_operator op,
+                                            data_type output_type,
+                                            rmm::cuda_stream_view stream,
+                                            rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Performs a binary operation between a string scalar and a string
@@ -75,13 +72,12 @@ std::unique_ptr<column> string_null_min_max(
  * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return std::unique_ptr<column> Output column
  */
-std::unique_ptr<column> binary_operation(
-  scalar const& lhs,
-  column_view const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> binary_operation(scalar const& lhs,
+                                         column_view const& rhs,
+                                         binary_operator op,
+                                         data_type output_type,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Performs a binary operation between a string column and a string
@@ -102,13 +98,12 @@ std::unique_ptr<column> binary_operation(
  * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return std::unique_ptr<column> Output column
  */
-std::unique_ptr<column> binary_operation(
-  column_view const& lhs,
-  scalar const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> binary_operation(column_view const& lhs,
+                                         scalar const& rhs,
+                                         binary_operator op,
+                                         data_type output_type,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Performs a binary operation between two string columns.
@@ -128,13 +123,12 @@ std::unique_ptr<column> binary_operation(
  * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return std::unique_ptr<column> Output column
  */
-std::unique_ptr<column> binary_operation(
-  column_view const& lhs,
-  column_view const& rhs,
-  binary_operator op,
-  data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> binary_operation(column_view const& lhs,
+                                         column_view const& rhs,
+                                         binary_operator op,
+                                         data_type output_type,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr);
 
 void binary_operation(mutable_column_view& out,
                       scalar const& lhs,

From 2f24667dc237c31529ba1b766f16124a553cd8f3 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 6 Mar 2023 11:04:37 -0800
Subject: [PATCH 06/17] Some stream_compaction.

---
 cpp/include/cudf/detail/stream_compaction.hpp | 8 ++++----
 cpp/src/stream_compaction/distinct_count.cu   | 3 ++-
 cpp/src/stream_compaction/unique_count.cu     | 5 +++--
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp
index e6cf9bb2c50..e0fc7b71cd9 100644
--- a/cpp/include/cudf/detail/stream_compaction.hpp
+++ b/cpp/include/cudf/detail/stream_compaction.hpp
@@ -150,8 +150,8 @@ cudf::size_type unique_count(column_view const& input,
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 cudf::size_type unique_count(table_view const& input,
-                             null_equality nulls_equal    = null_equality::EQUAL,
-                             rmm::cuda_stream_view stream = cudf::get_default_stream());
+                             null_equality nulls_equal,
+                             rmm::cuda_stream_view stream);
 
 /**
  * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy)
@@ -169,8 +169,8 @@ cudf::size_type distinct_count(column_view const& input,
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 cudf::size_type distinct_count(table_view const& input,
-                               null_equality nulls_equal    = null_equality::EQUAL,
-                               rmm::cuda_stream_view stream = cudf::get_default_stream());
+                               null_equality nulls_equal,
+                               rmm::cuda_stream_view stream);
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu
index 8cde6e0a7ed..fb6505daf06 100644
--- a/cpp/src/stream_compaction/distinct_count.cu
+++ b/cpp/src/stream_compaction/distinct_count.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "cudf/utilities/default_stream.hpp"
 #include "stream_compaction_common.cuh"
 #include "stream_compaction_common.hpp"
 
@@ -193,6 +194,6 @@ cudf::size_type distinct_count(column_view const& input,
 cudf::size_type distinct_count(table_view const& input, null_equality nulls_equal)
 {
   CUDF_FUNC_RANGE();
-  return detail::distinct_count(input, nulls_equal);
+  return detail::distinct_count(input, nulls_equal, cudf::get_default_stream());
 }
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/unique_count.cu b/cpp/src/stream_compaction/unique_count.cu
index 8363ee8120b..11a0226e269 100644
--- a/cpp/src/stream_compaction/unique_count.cu
+++ b/cpp/src/stream_compaction/unique_count.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "cudf/utilities/default_stream.hpp"
 #include "stream_compaction_common.cuh"
 #include "stream_compaction_common.hpp"
 
@@ -133,7 +134,7 @@ cudf::size_type unique_count(column_view const& input,
 cudf::size_type unique_count(table_view const& input, null_equality nulls_equal)
 {
   CUDF_FUNC_RANGE();
-  return detail::unique_count(input, nulls_equal);
+  return detail::unique_count(input, nulls_equal, cudf::get_default_stream());
 }
 
 }  // namespace cudf

From a50bf6b34037b910f54d1059bd3fe4e69e768dc2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 7 Mar 2023 12:50:47 -0800
Subject: [PATCH 07/17] Use current memory resource for temporaries instead of
 user-provided one.

---
 cpp/src/copying/scatter.cu                   | 7 ++++---
 cpp/src/sort/segmented_sort_impl.cuh         | 8 ++++++--
 cpp/src/stream_compaction/distinct.cu        | 9 +++++++--
 cpp/src/stream_compaction/stable_distinct.cu | 8 ++++++--
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu
index 273ecaf9adb..316f39b616c 100644
--- a/cpp/src/copying/scatter.cu
+++ b/cpp/src/copying/scatter.cu
@@ -253,7 +253,8 @@ struct column_scalar_scatterer_impl<struct_view, MapIterator> {
 
     auto scatter_functor   = column_scalar_scatterer<decltype(scatter_iter)>{};
     auto fields_iter_begin = make_counting_transform_iterator(0, [&](auto const& i) {
-      auto row_slr = get_element(typed_s->view().column(i), 0, stream, mr);
+      auto row_slr =
+        get_element(typed_s->view().column(i), 0, stream, rmm::mr::get_current_device_resource());
       return type_dispatcher<dispatch_storage_type>(row_slr->type(),
                                                     scatter_functor,
                                                     *row_slr,
@@ -392,8 +393,8 @@ std::unique_ptr<column> boolean_mask_scatter(column_view const& input,
                    0);
 
   // The scatter map is actually a table with only one column, which is scatter map.
-  auto scatter_map =
-    detail::apply_boolean_mask(table_view{{indices->view()}}, boolean_mask, stream, mr);
+  auto scatter_map = detail::apply_boolean_mask(
+    table_view{{indices->view()}}, boolean_mask, stream, rmm::mr::get_current_device_resource());
   auto output_table = detail::scatter(
     table_view{{input}}, scatter_map->get_column(0).view(), table_view{{target}}, stream, mr);
 
diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh
index 8c1d57f79bc..8a367209acf 100644
--- a/cpp/src/sort/segmented_sort_impl.cuh
+++ b/cpp/src/sort/segmented_sort_impl.cuh
@@ -24,6 +24,7 @@
 #include <rmm/device_uvector.hpp>
 
 #include <cub/device/device_segmented_sort.cuh>
+#include <rmm/mr/device/per_device_resource.hpp>
 
 namespace cudf {
 namespace detail {
@@ -73,8 +74,11 @@ struct column_fast_sort_fn {
   {
     // CUB's segmented sort functions cannot accept iterators.
     // We create a temporary column here for it to use.
-    auto temp_col =
-      cudf::detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream, mr);
+    auto temp_col                   = cudf::detail::allocate_like(input,
+                                                input.size(),
+                                                mask_allocation_policy::NEVER,
+                                                stream,
+                                                rmm::mr::get_current_device_resource());
     mutable_column_view output_view = temp_col->mutable_view();
 
     // DeviceSegmentedSort is faster than DeviceSegmentedRadixSort at this time
diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu
index 9a5a52a5b90..83c97521abf 100644
--- a/cpp/src/stream_compaction/distinct.cu
+++ b/cpp/src/stream_compaction/distinct.cu
@@ -25,6 +25,7 @@
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -145,8 +146,12 @@ std::unique_ptr<table> distinct(table_view const& input,
     return empty_like(input);
   }
 
-  auto const gather_map =
-    get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream, mr);
+  auto const gather_map = get_distinct_indices(input.select(keys),
+                                               keep,
+                                               nulls_equal,
+                                               nans_equal,
+                                               stream,
+                                               rmm::mr::get_current_device_resource());
   return detail::gather(input,
                         gather_map,
                         out_of_bounds_policy::DONT_CHECK,
diff --git a/cpp/src/stream_compaction/stable_distinct.cu b/cpp/src/stream_compaction/stable_distinct.cu
index 142018d4336..d45897930b0 100644
--- a/cpp/src/stream_compaction/stable_distinct.cu
+++ b/cpp/src/stream_compaction/stable_distinct.cu
@@ -38,8 +38,12 @@ std::unique_ptr<table> stable_distinct(table_view const& input,
     return empty_like(input);
   }
 
-  auto const distinct_indices =
-    get_distinct_indices(input.select(keys), keep, nulls_equal, nans_equal, stream, mr);
+  auto const distinct_indices = get_distinct_indices(input.select(keys),
+                                                     keep,
+                                                     nulls_equal,
+                                                     nans_equal,
+                                                     stream,
+                                                     rmm::mr::get_current_device_resource());
 
   // Markers to denote which rows to be copied to the output.
   auto const output_markers = [&] {

From ba57a94f8413f5e2d6f2b5f7fcf0386f5885034a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 8 Mar 2023 08:10:07 -0800
Subject: [PATCH 08/17] Bump


From fc227b71a98c616907a5e8ceba984e7094191a41 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 8 Mar 2023 14:24:28 -0500
Subject: [PATCH 09/17] Remove unused mr parameters.

---
 cpp/src/sort/segmented_sort_impl.cuh | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh
index 8a367209acf..46195e1d2cc 100644
--- a/cpp/src/sort/segmented_sort_impl.cuh
+++ b/cpp/src/sort/segmented_sort_impl.cuh
@@ -69,8 +69,7 @@ struct column_fast_sort_fn {
                  column_view const& segment_offsets,
                  mutable_column_view& indices,
                  bool ascending,
-                 rmm::cuda_stream_view stream,
-                 rmm::mr::device_memory_resource* mr)
+                 rmm::cuda_stream_view stream)
   {
     // CUB's segmented sort functions cannot accept iterators.
     // We create a temporary column here for it to use.
@@ -133,19 +132,14 @@ struct column_fast_sort_fn {
                   column_view const& segment_offsets,
                   mutable_column_view& indices,
                   bool ascending,
-                  rmm::cuda_stream_view stream,
-                  rmm::mr::device_memory_resource* mr)
+                  rmm::cuda_stream_view stream)
   {
-    fast_sort<T>(input, segment_offsets, indices, ascending, stream, mr);
+    fast_sort<T>(input, segment_offsets, indices, ascending, stream);
   }
 
   template <typename T, CUDF_ENABLE_IF(!is_fast_sort_supported<T>())>
-  void operator()(column_view const&,
-                  column_view const&,
-                  mutable_column_view&,
-                  bool,
-                  rmm::cuda_stream_view,
-                  rmm::mr::device_memory_resource*)
+  void operator()(
+    column_view const&, column_view const&, mutable_column_view&, bool, rmm::cuda_stream_view)
   {
     CUDF_FAIL("Column type cannot be used with fast-sort function");
   }

From d0371d10d233e2cd0044661e68b75885336dfc4a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 8 Mar 2023 14:34:48 -0500
Subject: [PATCH 10/17] Remove one more.

---
 cpp/src/sort/segmented_sort_impl.cuh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh
index 46195e1d2cc..b1fce380e28 100644
--- a/cpp/src/sort/segmented_sort_impl.cuh
+++ b/cpp/src/sort/segmented_sort_impl.cuh
@@ -175,8 +175,7 @@ std::unique_ptr<column> fast_segmented_sorted_order(column_view const& input,
                                                segment_offsets,
                                                indices_view,
                                                column_order == order::ASCENDING,
-                                               stream,
-                                               mr);
+                                               stream);
   return sorted_indices;
 }
 

From c6ad4cfd31f0075a57061b5d92407630f99db91b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 9 Mar 2023 07:39:57 -0800
Subject: [PATCH 11/17] Reorder includes.

---
 cpp/src/sort/segmented_sort_impl.cuh        | 2 +-
 cpp/src/stream_compaction/distinct_count.cu | 2 +-
 cpp/src/stream_compaction/unique_count.cu   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh
index b1fce380e28..b7347974173 100644
--- a/cpp/src/sort/segmented_sort_impl.cuh
+++ b/cpp/src/sort/segmented_sort_impl.cuh
@@ -22,9 +22,9 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 
 #include <cub/device/device_segmented_sort.cuh>
-#include <rmm/mr/device/per_device_resource.hpp>
 
 namespace cudf {
 namespace detail {
diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu
index c5e0d9c6060..0dae26c18a9 100644
--- a/cpp/src/stream_compaction/distinct_count.cu
+++ b/cpp/src/stream_compaction/distinct_count.cu
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "cudf/utilities/default_stream.hpp"
 #include "stream_compaction_common.cuh"
 #include "stream_compaction_common.hpp"
 
@@ -29,6 +28,7 @@
 #include <cudf/stream_compaction.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
diff --git a/cpp/src/stream_compaction/unique_count.cu b/cpp/src/stream_compaction/unique_count.cu
index 5876363a61b..4c1cf2b2bc3 100644
--- a/cpp/src/stream_compaction/unique_count.cu
+++ b/cpp/src/stream_compaction/unique_count.cu
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-#include "cudf/utilities/default_stream.hpp"
 #include "stream_compaction_common.cuh"
 #include "stream_compaction_common.hpp"
 
@@ -29,6 +28,7 @@
 #include <cudf/stream_compaction.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <rmm/cuda_stream_view.hpp>

From 489985b1fee2734ca8cafdb69271ea18e798e7b5 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 9 Mar 2023 14:34:23 -0800
Subject: [PATCH 12/17] Update java call.

---
 java/src/main/native/src/ColumnViewJni.cu | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu
index 7e0b0f9330d..6914d29c8c0 100644
--- a/java/src/main/native/src/ColumnViewJni.cu
+++ b/java/src/main/native/src/ColumnViewJni.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,6 +31,7 @@
 #include <cudf/utilities/span.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <thrust/functional.h>
 #include <thrust/logical.h>
 #include <thrust/scan.h>
@@ -192,7 +193,7 @@ std::unique_ptr<cudf::column> lists_distinct_by_key(cudf::lists_column_view cons
                                      child.child(0), child.child(1)}}, // input table
                          std::vector<size_type>{0, 1},                 // key columns
                          cudf::duplicate_keep_option::KEEP_LAST, cudf::null_equality::EQUAL,
-                         cudf::nan_equality::ALL_EQUAL, stream)
+                         cudf::nan_equality::ALL_EQUAL, stream, rmm::mr::get_current_device_resource())
                          ->release();
   auto const out_labels = out_columns.front()->view();
 

From 146afb2b083b96083c5dc8aa3dd3d6f2ec8bee73 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 9 Mar 2023 15:14:13 -0800
Subject: [PATCH 13/17] clang-format.

---
 java/src/main/native/src/ColumnViewJni.cu | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu
index 6914d29c8c0..8a2c0b2b411 100644
--- a/java/src/main/native/src/ColumnViewJni.cu
+++ b/java/src/main/native/src/ColumnViewJni.cu
@@ -188,13 +188,14 @@ std::unique_ptr<cudf::column> lists_distinct_by_key(cudf::lists_column_view cons
   // Use `cudf::duplicate_keep_option::KEEP_LAST` so this will produce the desired behavior when
   // being called in `create_map` in spark-rapids.
   // Other options comparing nulls and NaNs are set as all-equal.
-  auto out_columns = cudf::detail::stable_distinct(
-                         table_view{{column_view{cudf::device_span<cudf::size_type const>{labels}},
-                                     child.child(0), child.child(1)}}, // input table
-                         std::vector<size_type>{0, 1},                 // key columns
-                         cudf::duplicate_keep_option::KEEP_LAST, cudf::null_equality::EQUAL,
-                         cudf::nan_equality::ALL_EQUAL, stream, rmm::mr::get_current_device_resource())
-                         ->release();
+  auto out_columns =
+      cudf::detail::stable_distinct(
+          table_view{{column_view{cudf::device_span<cudf::size_type const>{labels}}, child.child(0),
+                      child.child(1)}}, // input table
+          std::vector<size_type>{0, 1}, // key columns
+          cudf::duplicate_keep_option::KEEP_LAST, cudf::null_equality::EQUAL,
+          cudf::nan_equality::ALL_EQUAL, stream, rmm::mr::get_current_device_resource())
+          ->release();
   auto const out_labels = out_columns.front()->view();
 
   // Assemble a structs column of <out_keys, out_vals>.

From 91cb17741bd9ee32b189442a6f5fd2e7425f558e Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
Date: Fri, 10 Mar 2023 15:36:44 -0500
Subject: [PATCH 14/17] Update cpp/src/stream_compaction/distinct.cu

Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com>
---
 cpp/src/stream_compaction/distinct.cu | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu
index 83c97521abf..083b1b2eb46 100644
--- a/cpp/src/stream_compaction/distinct.cu
+++ b/cpp/src/stream_compaction/distinct.cu
@@ -26,6 +26,7 @@
 #include <cudf/types.hpp>
 
 #include <rmm/mr/device/per_device_resource.hpp>
+
 #include <thrust/copy.h>
 #include <thrust/distance.h>
 #include <thrust/iterator/counting_iterator.h>

From 791d873f1feb31391a60619a165d5d4a22fc761b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 13 Mar 2023 07:21:36 -0700
Subject: [PATCH 15/17] Go back to calling detail::gather and pass the now
 required parameters.

---
 cpp/tests/copying/detail_gather_tests.cu | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu
index d4898113afb..89d015ff275 100644
--- a/cpp/tests/copying/detail_gather_tests.cu
+++ b/cpp/tests/copying/detail_gather_tests.cu
@@ -46,11 +46,9 @@ TYPED_TEST_SUITE(GatherTest, cudf::test::NumericTypes);
 TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest)
 {
   constexpr cudf::size_type source_size{1000};
-  rmm::device_uvector<cudf::size_type> gather_map_vec(source_size, cudf::get_default_stream());
-  auto gather_map = std::make_unique<cudf::column>(std::move(gather_map_vec));
-  thrust::sequence(rmm::exec_policy_nosync(cudf::get_default_stream()),
-                   gather_map->mutable_view().begin<cudf::size_type>(),
-                   gather_map->mutable_view().end<cudf::size_type>());
+  rmm::device_uvector<cudf::size_type> gather_map(source_size, cudf::get_default_stream());
+  thrust::sequence(
+    rmm::exec_policy_nosync(cudf::get_default_stream()), gather_map.begin(), gather_map.end());
 
   auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
   cudf::test::fixed_width_column_wrapper<TypeParam> source_column(data, data + source_size);
@@ -59,7 +57,13 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest)
 
   // test with device vector iterators
   {
-    std::unique_ptr<cudf::table> result = cudf::gather(source_table, gather_map->view());
+    std::unique_ptr<cudf::table> result =
+      cudf::detail::gather(source_table,
+                           gather_map.begin(),
+                           gather_map.end(),
+                           cudf::out_of_bounds_policy::DONT_CHECK,
+                           cudf::get_default_stream(),
+                           rmm::mr::get_current_device_resource());
 
     for (auto i = 0; i < source_table.num_columns(); ++i) {
       CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i));
@@ -70,7 +74,13 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest)
 
   // test with raw pointers
   {
-    std::unique_ptr<cudf::table> result = cudf::gather(source_table, gather_map->view());
+    std::unique_ptr<cudf::table> result =
+      cudf::detail::gather(source_table,
+                           gather_map.begin(),
+                           gather_map.data() + gather_map.size(),
+                           cudf::out_of_bounds_policy::DONT_CHECK,
+                           cudf::get_default_stream(),
+                           rmm::mr::get_current_device_resource());
 
     for (auto i = 0; i < source_table.num_columns(); ++i) {
       CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i));

From 3a3e8f36969874f00e61fc004e0d2dfe7abe86d0 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 13 Mar 2023 07:34:50 -0700
Subject: [PATCH 16/17] Remove default mr from detail::gather and update all
 call sites.

---
 cpp/benchmarks/common/generate_input.cu    |  4 +++-
 cpp/include/cudf/detail/gather.hpp         | 28 ++++++++++------------
 cpp/src/copying/copy.cu                    |  6 +++--
 cpp/src/rolling/detail/lead_lag_nested.cuh |  6 +++--
 cpp/tests/copying/detail_gather_tests.cu   |  3 ++-
 cpp/tests/copying/gather_str_tests.cpp     | 15 ++++++++----
 6 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
index 2829d14070c..a4650c369ba 100644
--- a/cpp/benchmarks/common/generate_input.cu
+++ b/cpp/benchmarks/common/generate_input.cu
@@ -32,6 +32,7 @@
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/device_ptr.h>
@@ -542,7 +543,8 @@ std::unique_ptr<cudf::column> create_random_column<cudf::string_view>(data_profi
                                         sample_indices,
                                         cudf::out_of_bounds_policy::DONT_CHECK,
                                         cudf::detail::negative_index_policy::NOT_ALLOWED,
-                                        cudf::get_default_stream());
+                                        cudf::get_default_stream(),
+                                        rmm::mr::get_current_device_resource());
   return std::move(str_table->release()[0]);
 }
 
diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp
index 9d61a8de184..034eb6c1282 100644
--- a/cpp/include/cudf/detail/gather.hpp
+++ b/cpp/include/cudf/detail/gather.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -61,13 +61,12 @@ enum class negative_index_policy : bool { ALLOWED, NOT_ALLOWED };
  * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return Result of the gather
  */
-std::unique_ptr<table> gather(
-  table_view const& source_table,
-  column_view const& gather_map,
-  out_of_bounds_policy bounds_policy,
-  negative_index_policy neg_indices,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<table> gather(table_view const& source_table,
+                              column_view const& gather_map,
+                              out_of_bounds_policy bounds_policy,
+                              negative_index_policy neg_indices,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::detail::gather(table_view const&,column_view const&,table_view
@@ -76,13 +75,12 @@ std::unique_ptr<table> gather(
  *
  * @throws cudf::logic_error if `gather_map` span size is larger than max of `size_type`.
  */
-std::unique_ptr<table> gather(
-  table_view const& source_table,
-  device_span<size_type const> const gather_map,
-  out_of_bounds_policy bounds_policy,
-  negative_index_policy neg_indices,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<table> gather(table_view const& source_table,
+                              device_span<size_type const> const gather_map,
+                              out_of_bounds_policy bounds_policy,
+                              negative_index_policy neg_indices,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu
index 0978cf441d8..9cea2152996 100644
--- a/cpp/src/copying/copy.cu
+++ b/cpp/src/copying/copy.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,6 +30,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -174,7 +175,8 @@ std::unique_ptr<column> scatter_gather_based_if_else(cudf::column_view const& lh
                                                     gather_map,
                                                     out_of_bounds_policy::DONT_CHECK,
                                                     negative_index_policy::NOT_ALLOWED,
-                                                    stream);
+                                                    stream,
+                                                    rmm::mr::get_current_device_resource());
 
   auto result = cudf::detail::scatter(
     table_view{std::vector<column_view>{scatter_src_lhs->get_column(0).view()}},
diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh
index 859ed7e5d53..17b12c74518 100644
--- a/cpp/src/rolling/detail/lead_lag_nested.cuh
+++ b/cpp/src/rolling/detail/lead_lag_nested.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -27,6 +27,7 @@
 
 #include <rmm/exec_policy.hpp>
 
+#include <rmm/mr/device/per_device_resource.hpp>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
@@ -191,7 +192,8 @@ std::unique_ptr<column> compute_lead_lag_for_nested(aggregation::Kind op,
                          scatter_map,
                          out_of_bounds_policy::DONT_CHECK,
                          cudf::detail::negative_index_policy::NOT_ALLOWED,
-                         stream);
+                         stream,
+                         rmm::mr::get_current_device_resource());
 
   // Scatter defaults into locations where LEAD/LAG computed nulls.
   auto scattered_results = cudf::detail::scatter(
diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu
index 89d015ff275..aae511413ef 100644
--- a/cpp/tests/copying/detail_gather_tests.cu
+++ b/cpp/tests/copying/detail_gather_tests.cu
@@ -107,7 +107,8 @@ TYPED_TEST(GatherTest, GatherDetailInvalidIndexTest)
                          gather_map,
                          cudf::out_of_bounds_policy::NULLIFY,
                          cudf::detail::negative_index_policy::NOT_ALLOWED,
-                         cudf::get_default_stream());
+                         cudf::get_default_stream(),
+                         rmm::mr::get_current_device_resource());
 
   auto expect_data =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2) ? 0 : i; });
diff --git a/cpp/tests/copying/gather_str_tests.cpp b/cpp/tests/copying/gather_str_tests.cpp
index 3db2ce399cc..95ab4b25642 100644
--- a/cpp/tests/copying/gather_str_tests.cpp
+++ b/cpp/tests/copying/gather_str_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@
 #include <cudf/detail/gather.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
+#include <rmm/mr/device/per_device_resource.hpp>
 
 class GatherTestStr : public cudf::test::BaseFixture {
 };
@@ -87,7 +88,8 @@ TEST_F(GatherTestStr, Gather)
                                       gather_map,
                                       cudf::out_of_bounds_policy::NULLIFY,
                                       cudf::detail::negative_index_policy::NOT_ALLOWED,
-                                      cudf::get_default_stream());
+                                      cudf::get_default_stream(),
+                                      rmm::mr::get_current_device_resource());
 
   std::vector<const char*> h_expected;
   std::vector<int32_t> expected_validity;
@@ -118,7 +120,8 @@ TEST_F(GatherTestStr, GatherDontCheckOutOfBounds)
                                       gather_map,
                                       cudf::out_of_bounds_policy::DONT_CHECK,
                                       cudf::detail::negative_index_policy::NOT_ALLOWED,
-                                      cudf::get_default_stream());
+                                      cudf::get_default_stream(),
+                                      rmm::mr::get_current_device_resource());
 
   std::vector<const char*> h_expected;
   for (auto itr = h_map.begin(); itr != h_map.end(); ++itr) {
@@ -137,7 +140,8 @@ TEST_F(GatherTestStr, GatherEmptyMapStringsColumn)
                                       gather_map,
                                       cudf::out_of_bounds_policy::NULLIFY,
                                       cudf::detail::negative_index_policy::NOT_ALLOWED,
-                                      cudf::get_default_stream());
+                                      cudf::get_default_stream(),
+                                      rmm::mr::get_current_device_resource());
   cudf::test::expect_column_empty(results->get_column(0).view());
 }
 
@@ -151,6 +155,7 @@ TEST_F(GatherTestStr, GatherZeroSizeStringsColumn)
                                       gather_map,
                                       cudf::out_of_bounds_policy::NULLIFY,
                                       cudf::detail::negative_index_policy::NOT_ALLOWED,
-                                      cudf::get_default_stream());
+                                      cudf::get_default_stream(),
+                                      rmm::mr::get_current_device_resource());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results->get_column(0).view());
 }

From 3deb09c6274100d077520cb085ae060d5893381a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyas.ramasubramani@gmail.com>
Date: Mon, 13 Mar 2023 14:37:30 -0400
Subject: [PATCH 17/17] Apply suggestions from code review

Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com>
---
 cpp/benchmarks/common/generate_input.cu    | 2 +-
 cpp/src/copying/copy.cu                    | 2 +-
 cpp/src/rolling/detail/lead_lag_nested.cuh | 2 +-
 cpp/tests/copying/gather_str_tests.cpp     | 1 +
 4 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
index 085332c697d..545028260b8 100644
--- a/cpp/benchmarks/common/generate_input.cu
+++ b/cpp/benchmarks/common/generate_input.cu
@@ -31,8 +31,8 @@
 
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_uvector.hpp>
-
 #include <rmm/mr/device/per_device_resource.hpp>
+
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/device_ptr.h>
diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu
index 9cea2152996..9ec00612f2f 100644
--- a/cpp/src/copying/copy.cu
+++ b/cpp/src/copying/copy.cu
@@ -29,8 +29,8 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
-
 #include <rmm/mr/device/per_device_resource.hpp>
+
 #include <thrust/copy.h>
 #include <thrust/distance.h>
 #include <thrust/iterator/counting_iterator.h>
diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh
index 17b12c74518..d2fe9fabd1b 100644
--- a/cpp/src/rolling/detail/lead_lag_nested.cuh
+++ b/cpp/src/rolling/detail/lead_lag_nested.cuh
@@ -26,8 +26,8 @@
 #include <cudf/utilities/traits.hpp>
 
 #include <rmm/exec_policy.hpp>
-
 #include <rmm/mr/device/per_device_resource.hpp>
+
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/distance.h>
diff --git a/cpp/tests/copying/gather_str_tests.cpp b/cpp/tests/copying/gather_str_tests.cpp
index 95ab4b25642..7810566fbf1 100644
--- a/cpp/tests/copying/gather_str_tests.cpp
+++ b/cpp/tests/copying/gather_str_tests.cpp
@@ -24,6 +24,7 @@
 #include <cudf/detail/gather.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
+
 #include <rmm/mr/device/per_device_resource.hpp>
 
 class GatherTestStr : public cudf::test::BaseFixture {