From 3393b060f06cb00fe846387b652a4b38d56b23a8 Mon Sep 17 00:00:00 2001
From: Mark Harris <783069+harrism@users.noreply.github.com>
Date: Wed, 24 Apr 2024 14:08:47 +1000
Subject: [PATCH] Convert device_memory_resource* to device_async_resource_ref
 (#4365)

Closes #4333

For reviewers:
Many of changes are simple textual replace of `rmm::mr::device_memory_resource *` with `rmm::device_async_resource_ref`.

However, I found that `concurrent_unordered_map` used in cuGraph is not up to date with the
latest changes from the version in libcudf, so I copied those over as well. This gets rid
of the `hash_allocator.cuh`, and uses `rmm::mr::polymorphic_allocator` instead of the custom
allocator previously used. This obviates the need to update any `device_memory_resource*` used
in the old custom allocator.

Authors:
  - Mark Harris (https://github.com/harrism)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)

URL: https://github.com/rapidsai/cugraph/pull/4365
---
 cpp/include/cugraph/algorithms.hpp            |   4 +-
 cpp/include/cugraph/dendrogram.hpp            |   5 +-
 cpp/include/cugraph/legacy/functions.hpp      |   3 +-
 cpp/include/cugraph/legacy/graph.hpp          |  19 ++--
 .../include/hash/concurrent_unordered_map.cuh | 101 ++++++++++--------
 .../include/hash/hash_allocator.cuh           |  99 -----------------
 .../include/hash/helper_functions.cuh         |   2 +
 cpp/src/converters/legacy/COOtoCSR.cu         |  16 +--
 cpp/src/converters/legacy/COOtoCSR.cuh        |  17 +--
 cpp/src/tree/legacy/mst.cu                    |   9 +-
 cpp/tests/utilities/base_fixture.hpp          |   5 +-
 11 files changed, 102 insertions(+), 178 deletions(-)
 delete mode 100644 cpp/libcugraph_etl/include/hash/hash_allocator.cuh
diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp
index c817665b1cb..0caa151daac 100644
--- a/cpp/include/cugraph/algorithms.hpp
+++ b/cpp/include/cugraph/algorithms.hpp
@@ -23,6 +23,8 @@
 #include <cugraph/legacy/graph.hpp>
 #include <cugraph/legacy/internals.hpp>
 
+#include <rmm/resource_ref.hpp>
+
 #ifndef NO_CUGRAPH_OPS
 #include <cugraph-ops/graph/sampling.hpp>
 #endif
@@ -830,7 +832,7 @@ template <typename vertex_t, typename edge_t, typename weight_t>
 std::unique_ptr<legacy::GraphCOO<vertex_t, edge_t, weight_t>> minimum_spanning_tree(
   raft::handle_t const& handle,
   legacy::GraphCSRView<vertex_t, edge_t, weight_t> const& graph,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 namespace subgraph {
 /**
diff --git a/cpp/include/cugraph/dendrogram.hpp b/cpp/include/cugraph/dendrogram.hpp
index beebec4fd3f..14da3f13b1e 100644
--- a/cpp/include/cugraph/dendrogram.hpp
+++ b/cpp/include/cugraph/dendrogram.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 #pragma once
 
 #include <rmm/device_uvector.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <memory>
 #include <vector>
@@ -28,7 +29,7 @@ class Dendrogram {
   void add_level(vertex_t first_index,
                  vertex_t num_verts,
                  rmm::cuda_stream_view stream_view,
-                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+                 rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
   {
     level_ptr_.push_back(
       std::make_unique<rmm::device_uvector<vertex_t>>(num_verts, stream_view, mr));
diff --git a/cpp/include/cugraph/legacy/functions.hpp b/cpp/include/cugraph/legacy/functions.hpp
index 1efa2294847..51f05a6d26d 100644
--- a/cpp/include/cugraph/legacy/functions.hpp
+++ b/cpp/include/cugraph/legacy/functions.hpp
@@ -20,6 +20,7 @@
 #include <raft/core/handle.hpp>
 
 #include <rmm/device_buffer.hpp>
+#include <rmm/resource_ref.hpp>
 
 namespace cugraph {
 
@@ -43,7 +44,7 @@ namespace cugraph {
 template <typename VT, typename ET, typename WT>
 std::unique_ptr<legacy::GraphCSR<VT, ET, WT>> coo_to_csr(
   legacy::GraphCOOView<VT, ET, WT> const& graph,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief    Broadcast using handle communicator
diff --git a/cpp/include/cugraph/legacy/graph.hpp b/cpp/include/cugraph/legacy/graph.hpp
index e64cbf32d4c..19cd5bbd6d0 100644
--- a/cpp/include/cugraph/legacy/graph.hpp
+++ b/cpp/include/cugraph/legacy/graph.hpp
@@ -17,6 +17,7 @@
 #include <raft/core/handle.hpp>
 
 #include <rmm/device_buffer.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <unistd.h>
 
@@ -349,9 +350,9 @@ class GraphCOO {
    */
   GraphCOO(vertex_t number_of_vertices,
            edge_t number_of_edges,
-           bool has_data                       = false,
-           cudaStream_t stream                 = nullptr,
-           rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+           bool has_data                     = false,
+           cudaStream_t stream               = nullptr,
+           rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
     : number_of_vertices_p(number_of_vertices),
       number_of_edges_p(number_of_edges),
       src_indices_p(sizeof(vertex_t) * number_of_edges, stream, mr),
@@ -361,8 +362,8 @@ class GraphCOO {
   }
 
   GraphCOO(GraphCOOView<vertex_t, edge_t, weight_t> const& graph,
-           cudaStream_t stream                 = nullptr,
-           rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+           cudaStream_t stream               = nullptr,
+           rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
     : number_of_vertices_p(graph.number_of_vertices),
       number_of_edges_p(graph.number_of_edges),
       src_indices_p(graph.src_indices, graph.number_of_edges * sizeof(vertex_t), stream, mr),
@@ -457,7 +458,7 @@ class GraphCompressedSparseBase {
                             edge_t number_of_edges,
                             bool has_data,
                             cudaStream_t stream,
-                            rmm::mr::device_memory_resource* mr)
+                            rmm::device_async_resource_ref mr)
     : number_of_vertices_p(number_of_vertices),
       number_of_edges_p(number_of_edges),
       offsets_p(sizeof(edge_t) * (number_of_vertices + 1), stream, mr),
@@ -525,9 +526,9 @@ class GraphCSR : public GraphCompressedSparseBase<vertex_t, edge_t, weight_t> {
    */
   GraphCSR(vertex_t number_of_vertices_,
            edge_t number_of_edges_,
-           bool has_data_                      = false,
-           cudaStream_t stream                 = nullptr,
-           rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+           bool has_data_                    = false,
+           cudaStream_t stream               = nullptr,
+           rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource())
     : GraphCompressedSparseBase<vertex_t, edge_t, weight_t>(
         number_of_vertices_, number_of_edges_, has_data_, stream, mr)
   {
diff --git a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh
index e71bc2b3548..0e89dbfb9c1 100644
--- a/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh
+++ b/cpp/libcugraph_etl/include/hash/concurrent_unordered_map.cuh
@@ -21,19 +21,19 @@
  */
 #pragma once
 
-#include <cudf/detail/utilities/device_atomics.cuh>
 #include <cudf/hashing/detail/default_hash.cuh>
-#include <cudf/hashing/detail/hash_functions.cuh>
-#include <cudf/utilities/error.hpp>
+#include <raft/util/cuda_rt_essentials.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/polymorphic_allocator.hpp>
 
+#include <cuda/atomic>
 #include <thrust/pair.h>
 
-#include <hash/hash_allocator.cuh>
 #include <hash/helper_functions.cuh>
 #include <hash/managed.cuh>
 
+#include <functional>
 #include <iostream>
 #include <iterator>
 #include <limits>
@@ -78,8 +78,8 @@ template <typename pair_type,
           typename value_type = typename pair_type::second_type>
 constexpr bool is_packable()
 {
-  return std::is_integral<key_type>::value and std::is_integral<value_type>::value and
-         not std::is_void<packed_t<pair_type>>::value and
+  return std::is_integral_v<key_type> and std::is_integral_v<value_type> and
+         not std::is_void_v<packed_t<pair_type>> and
          std::has_unique_object_representations_v<pair_type>;
 }
 
@@ -95,8 +95,8 @@ union pair_packer;
 template <typename pair_type>
 union pair_packer<pair_type, std::enable_if_t<is_packable<pair_type>()>> {
   using packed_type = packed_t<pair_type>;
-  packed_type const packed;
-  pair_type const pair;
+  packed_type packed;
+  pair_type pair;
 
   __device__ pair_packer(pair_type _pair) : pair{_pair} {}
 
@@ -120,7 +120,7 @@ template <typename Key,
           typename Element,
           typename Hasher    = cudf::hashing::detail::default_hash<Key>,
           typename Equality  = equal_to<Key>,
-          typename Allocator = default_allocator<thrust::pair<Key, Element>>>
+          typename Allocator = rmm::mr::polymorphic_allocator<thrust::pair<Key, Element>>>
 class concurrent_unordered_map {
  public:
   using size_type      = size_t;
@@ -131,7 +131,7 @@ class concurrent_unordered_map {
   using mapped_type    = Element;
   using value_type     = thrust::pair<Key, Element>;
   using iterator       = cycle_iterator_adapter<value_type*>;
-  using const_iterator = const cycle_iterator_adapter<value_type*>;
+  using const_iterator = cycle_iterator_adapter<value_type*> const;
 
  public:
   /**
@@ -163,12 +163,12 @@ class concurrent_unordered_map {
    * storage
    */
   static auto create(size_type capacity,
-                     rmm::cuda_stream_view stream     = rmm::cuda_stream_default,
-                     const mapped_type unused_element = std::numeric_limits<mapped_type>::max(),
-                     const key_type unused_key        = std::numeric_limits<key_type>::max(),
-                     const Hasher& hash_function      = hasher(),
-                     const Equality& equal            = key_equal(),
-                     const allocator_type& allocator  = allocator_type())
+                     rmm::cuda_stream_view stream,
+                     mapped_type const unused_element = std::numeric_limits<mapped_type>::max(),
+                     key_type const unused_key        = std::numeric_limits<key_type>::max(),
+                     Hasher const& hash_function      = hasher(),
+                     Equality const& equal            = key_equal(),
+                     allocator_type const& allocator  = allocator_type())
   {
     using Self = concurrent_unordered_map<Key, Element, Hasher, Equality, Allocator>;
 
@@ -246,7 +246,7 @@ class concurrent_unordered_map {
 
   __host__ __device__ mapped_type get_unused_element() const { return m_unused_element; }
 
-  __host__ __device__ size_type capacity() const { return m_capacity; }
+  [[nodiscard]] __host__ __device__ size_type capacity() const { return m_capacity; }
 
  private:
   /**
@@ -271,16 +271,21 @@ class concurrent_unordered_map {
   __device__ std::enable_if_t<is_packable<pair_type>(), insert_result> attempt_insert(
     value_type* const __restrict__ insert_location, value_type const& insert_pair)
   {
-    pair_packer<pair_type> const unused{thrust::make_pair(m_unused_key, m_unused_element)};
-    pair_packer<pair_type> const new_pair{insert_pair};
-    pair_packer<pair_type> const old{
-      atomicCAS(reinterpret_cast<typename pair_packer<pair_type>::packed_type*>(insert_location),
-                unused.packed,
-                new_pair.packed)};
+    pair_packer<pair_type> expected{thrust::make_pair(m_unused_key, m_unused_element)};
+    pair_packer<pair_type> desired{insert_pair};
 
-    if (old.packed == unused.packed) { return insert_result::SUCCESS; }
+    using packed_type = typename pair_packer<pair_type>::packed_type;
 
-    if (m_equal(old.pair.first, insert_pair.first)) { return insert_result::DUPLICATE; }
+    auto* insert_ptr = reinterpret_cast<packed_type*>(insert_location);
+    cuda::atomic_ref<packed_type, cuda::thread_scope_device> ref{*insert_ptr};
+    auto const success =
+      ref.compare_exchange_strong(expected.packed, desired.packed, cuda::std::memory_order_relaxed);
+
+    if (success) {
+      return insert_result::SUCCESS;
+    } else if (m_equal(expected.pair.first, insert_pair.first)) {
+      return insert_result::DUPLICATE;
+    }
     return insert_result::CONTINUE;
   }
 
@@ -295,16 +300,20 @@ class concurrent_unordered_map {
   __device__ std::enable_if_t<not is_packable<pair_type>(), insert_result> attempt_insert(
     value_type* const __restrict__ insert_location, value_type const& insert_pair)
   {
-    key_type const old_key{atomicCAS(&(insert_location->first), m_unused_key, insert_pair.first)};
+    auto expected = m_unused_key;
+    cuda::atomic_ref<key_type, cuda::thread_scope_device> ref{insert_location->first};
+    auto const key_success =
+      ref.compare_exchange_strong(expected, insert_pair.first, cuda::std::memory_order_relaxed);
 
     // Hash bucket empty
-    if (m_unused_key == old_key) {
+    if (key_success) {
       insert_location->second = insert_pair.second;
       return insert_result::SUCCESS;
     }
-
     // Key already exists
-    if (m_equal(old_key, insert_pair.first)) { return insert_result::DUPLICATE; }
+    else if (m_equal(expected, insert_pair.first)) {
+      return insert_result::DUPLICATE;
+    }
 
     return insert_result::CONTINUE;
   }
@@ -330,7 +339,7 @@ class concurrent_unordered_map {
    */
   __device__ thrust::pair<iterator, bool> insert(value_type const& insert_pair)
   {
-    const size_type key_hash{m_hf(insert_pair.first)};
+    size_type const key_hash{m_hf(insert_pair.first)};
     size_type index{key_hash % m_capacity};
 
     insert_result status{insert_result::CONTINUE};
@@ -343,7 +352,7 @@ class concurrent_unordered_map {
       index          = (index + 1) % m_capacity;
     }
 
-    bool const insert_success = (status == insert_result::SUCCESS) ? true : false;
+    bool const insert_success = status == insert_result::SUCCESS;
 
     return thrust::make_pair(
       iterator(m_hashtbl_values, m_hashtbl_values + m_capacity, current_bucket), insert_success);
@@ -424,8 +433,7 @@ class concurrent_unordered_map {
     }
   }
 
-  void assign_async(const concurrent_unordered_map& other,
-                    rmm::cuda_stream_view stream = rmm::cuda_stream_default)
+  void assign_async(concurrent_unordered_map const& other, rmm::cuda_stream_view stream)
   {
     if (other.m_capacity <= m_capacity) {
       m_capacity = other.m_capacity;
@@ -443,7 +451,7 @@ class concurrent_unordered_map {
                                   stream.value()));
   }
 
-  void clear_async(rmm::cuda_stream_view stream = rmm::cuda_stream_default)
+  void clear_async(rmm::cuda_stream_view stream)
   {
     constexpr int block_size = 128;
     init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>(
@@ -458,7 +466,7 @@ class concurrent_unordered_map {
     }
   }
 
-  void prefetch(const int dev_id, rmm::cuda_stream_view stream = rmm::cuda_stream_default)
+  void prefetch(int const dev_id, rmm::cuda_stream_view stream)
   {
     cudaPointerAttributes hashtbl_values_ptr_attributes;
     cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values);
@@ -478,7 +486,7 @@ class concurrent_unordered_map {
    *
    * @param stream CUDA stream used for device memory operations and kernel launches.
    */
-  void destroy(rmm::cuda_stream_view stream = rmm::cuda_stream_default)
+  void destroy(rmm::cuda_stream_view stream)
   {
     m_allocator.deallocate(m_hashtbl_values, m_capacity, stream);
     delete this;
@@ -514,12 +522,12 @@ class concurrent_unordered_map {
    * @param stream CUDA stream used for device memory operations and kernel launches.
    */
   concurrent_unordered_map(size_type capacity,
-                           const mapped_type unused_element,
-                           const key_type unused_key,
-                           const Hasher& hash_function,
-                           const Equality& equal,
-                           const allocator_type& allocator,
-                           rmm::cuda_stream_view stream = rmm::cuda_stream_default)
+                           mapped_type const unused_element,
+                           key_type const unused_key,
+                           Hasher const& hash_function,
+                           Equality const& equal,
+                           allocator_type const& allocator,
+                           rmm::cuda_stream_view stream)
     : m_hf(hash_function),
       m_equal(equal),
       m_allocator(allocator),
@@ -542,8 +550,11 @@ class concurrent_unordered_map {
       }
     }
 
-    init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>(
-      m_hashtbl_values, m_capacity, m_unused_key, m_unused_element);
-    RAFT_CUDA_TRY(cudaGetLastError());
+    if (m_capacity > 0) {
+      init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>(
+        m_hashtbl_values, m_capacity, m_unused_key, m_unused_element);
+    }
+
+    CUDF_CHECK_CUDA(stream.value());
   }
 };
diff --git a/cpp/libcugraph_etl/include/hash/hash_allocator.cuh b/cpp/libcugraph_etl/include/hash/hash_allocator.cuh
deleted file mode 100644
index a4c8c17e01b..00000000000
--- a/cpp/libcugraph_etl/include/hash/hash_allocator.cuh
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2017-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef HASH_ALLOCATOR_CUH
-#define HASH_ALLOCATOR_CUH
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/mr/device/device_memory_resource.hpp>
-#include <rmm/mr/device/managed_memory_resource.hpp>
-#include <rmm/mr/device/per_device_resource.hpp>
-
-#include <new>
-
-template <class T>
-struct managed_allocator {
-  typedef T value_type;
-  rmm::mr::device_memory_resource* mr = new rmm::mr::managed_memory_resource;
-
-  managed_allocator() = default;
-
-  template <class U>
-  constexpr managed_allocator(const managed_allocator<U>&) noexcept
-  {
-  }
-
-  T* allocate(std::size_t n, rmm::cuda_stream_view stream = rmm::cuda_stream_default) const
-  {
-    return static_cast<T*>(mr->allocate(n * sizeof(T), stream));
-  }
-
-  void deallocate(T* p,
-                  std::size_t n,
-                  rmm::cuda_stream_view stream = rmm::cuda_stream_default) const
-  {
-    mr->deallocate(p, n * sizeof(T), stream);
-  }
-};
-
-template <class T, class U>
-bool operator==(const managed_allocator<T>&, const managed_allocator<U>&)
-{
-  return true;
-}
-template <class T, class U>
-bool operator!=(const managed_allocator<T>&, const managed_allocator<U>&)
-{
-  return false;
-}
-
-template <class T>
-struct default_allocator {
-  typedef T value_type;
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource();
-
-  default_allocator() = default;
-
-  template <class U>
-  constexpr default_allocator(const default_allocator<U>&) noexcept
-  {
-  }
-
-  T* allocate(std::size_t n, rmm::cuda_stream_view stream = rmm::cuda_stream_default) const
-  {
-    return static_cast<T*>(mr->allocate(n * sizeof(T), stream));
-  }
-
-  void deallocate(T* p,
-                  std::size_t n,
-                  rmm::cuda_stream_view stream = rmm::cuda_stream_default) const
-  {
-    mr->deallocate(p, n * sizeof(T), stream);
-  }
-};
-
-template <class T, class U>
-bool operator==(const default_allocator<T>&, const default_allocator<U>&)
-{
-  return true;
-}
-template <class T, class U>
-bool operator!=(const default_allocator<T>&, const default_allocator<U>&)
-{
-  return false;
-}
-
-#endif
diff --git a/cpp/libcugraph_etl/include/hash/helper_functions.cuh b/cpp/libcugraph_etl/include/hash/helper_functions.cuh
index 183516ae782..67b24b4937e 100644
--- a/cpp/libcugraph_etl/include/hash/helper_functions.cuh
+++ b/cpp/libcugraph_etl/include/hash/helper_functions.cuh
@@ -21,6 +21,8 @@
 
 #include <thrust/pair.h>
 
+#include <cassert>
+
 constexpr int64_t DEFAULT_HASH_TABLE_OCCUPANCY = 50;
 
 /**
diff --git a/cpp/src/converters/legacy/COOtoCSR.cu b/cpp/src/converters/legacy/COOtoCSR.cu
index 5e8e1604586..545016bc5ce 100644
--- a/cpp/src/converters/legacy/COOtoCSR.cu
+++ b/cpp/src/converters/legacy/COOtoCSR.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,37 +16,39 @@
 
 #include "COOtoCSR.cuh"
 
+#include <rmm/resource_ref.hpp>
+
 namespace cugraph {
 
 // Explicit instantiation for uint32_t + float
 template std::unique_ptr<legacy::GraphCSR<uint32_t, uint32_t, float>>
 coo_to_csr<uint32_t, uint32_t, float>(legacy::GraphCOOView<uint32_t, uint32_t, float> const& graph,
-                                      rmm::mr::device_memory_resource*);
+                                      rmm::device_async_resource_ref);
 
 // Explicit instantiation for uint32_t + double
 template std::unique_ptr<legacy::GraphCSR<uint32_t, uint32_t, double>>
 coo_to_csr<uint32_t, uint32_t, double>(
-  legacy::GraphCOOView<uint32_t, uint32_t, double> const& graph, rmm::mr::device_memory_resource*);
+  legacy::GraphCOOView<uint32_t, uint32_t, double> const& graph, rmm::device_async_resource_ref);
 
 // Explicit instantiation for int + float
 template std::unique_ptr<legacy::GraphCSR<int32_t, int32_t, float>>
 coo_to_csr<int32_t, int32_t, float>(legacy::GraphCOOView<int32_t, int32_t, float> const& graph,
-                                    rmm::mr::device_memory_resource*);
+                                    rmm::device_async_resource_ref);
 
 // Explicit instantiation for int + double
 template std::unique_ptr<legacy::GraphCSR<int32_t, int32_t, double>>
 coo_to_csr<int32_t, int32_t, double>(legacy::GraphCOOView<int32_t, int32_t, double> const& graph,
-                                     rmm::mr::device_memory_resource*);
+                                     rmm::device_async_resource_ref);
 
 // Explicit instantiation for int64_t + float
 template std::unique_ptr<legacy::GraphCSR<int64_t, int64_t, float>>
 coo_to_csr<int64_t, int64_t, float>(legacy::GraphCOOView<int64_t, int64_t, float> const& graph,
-                                    rmm::mr::device_memory_resource*);
+                                    rmm::device_async_resource_ref);
 
 // Explicit instantiation for int64_t + double
 template std::unique_ptr<legacy::GraphCSR<int64_t, int64_t, double>>
 coo_to_csr<int64_t, int64_t, double>(legacy::GraphCOOView<int64_t, int64_t, double> const& graph,
-                                     rmm::mr::device_memory_resource*);
+                                     rmm::device_async_resource_ref);
 
 // in-place versions:
 //
diff --git a/cpp/src/converters/legacy/COOtoCSR.cuh b/cpp/src/converters/legacy/COOtoCSR.cuh
index d2fc9fb2cc2..feae22b53a9 100644
--- a/cpp/src/converters/legacy/COOtoCSR.cuh
+++ b/cpp/src/converters/legacy/COOtoCSR.cuh
@@ -27,6 +27,7 @@
 #include <cugraph/utilities/error.hpp>
 
 #include <rmm/exec_policy.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <cub/device/device_radix_sort.cuh>
 #include <cub/device/device_run_length_encode.cuh>
@@ -133,7 +134,7 @@ rmm::device_buffer create_offset(VT* source,
                                  VT number_of_vertices,
                                  ET number_of_edges,
                                  rmm::cuda_stream_view stream_view,
-                                 rmm::mr::device_memory_resource* mr)
+                                 rmm::device_async_resource_ref mr)
 {
   // Offset array needs an extra element at the end to contain the ending offsets
   // of the last vertex
@@ -149,7 +150,7 @@ rmm::device_buffer create_offset(VT* source,
 
 template <typename VT, typename ET, typename WT>
 std::unique_ptr<legacy::GraphCSR<VT, ET, WT>> coo_to_csr(
-  legacy::GraphCOOView<VT, ET, WT> const& graph, rmm::mr::device_memory_resource* mr)
+  legacy::GraphCOOView<VT, ET, WT> const& graph, rmm::device_async_resource_ref mr)
 {
   rmm::cuda_stream_view stream_view;
 
@@ -195,32 +196,32 @@ void coo_to_csr_inplace(legacy::GraphCOOView<VT, ET, WT>& graph,
 // EIDecl for uint32_t + float
 extern template std::unique_ptr<legacy::GraphCSR<uint32_t, uint32_t, float>>
 coo_to_csr<uint32_t, uint32_t, float>(legacy::GraphCOOView<uint32_t, uint32_t, float> const& graph,
-                                      rmm::mr::device_memory_resource*);
+                                      rmm::device_async_resource_ref);
 
 // EIDecl for uint32_t + double
 extern template std::unique_ptr<legacy::GraphCSR<uint32_t, uint32_t, double>>
 coo_to_csr<uint32_t, uint32_t, double>(
-  legacy::GraphCOOView<uint32_t, uint32_t, double> const& graph, rmm::mr::device_memory_resource*);
+  legacy::GraphCOOView<uint32_t, uint32_t, double> const& graph, rmm::device_async_resource_ref);
 
 // EIDecl for int + float
 extern template std::unique_ptr<legacy::GraphCSR<int32_t, int32_t, float>>
 coo_to_csr<int32_t, int32_t, float>(legacy::GraphCOOView<int32_t, int32_t, float> const& graph,
-                                    rmm::mr::device_memory_resource*);
+                                    rmm::device_async_resource_ref);
 
 // EIDecl for int + double
 extern template std::unique_ptr<legacy::GraphCSR<int32_t, int32_t, double>>
 coo_to_csr<int32_t, int32_t, double>(legacy::GraphCOOView<int32_t, int32_t, double> const& graph,
-                                     rmm::mr::device_memory_resource*);
+                                     rmm::device_async_resource_ref);
 
 // EIDecl for int64_t + float
 extern template std::unique_ptr<legacy::GraphCSR<int64_t, int64_t, float>>
 coo_to_csr<int64_t, int64_t, float>(legacy::GraphCOOView<int64_t, int64_t, float> const& graph,
-                                    rmm::mr::device_memory_resource*);
+                                    rmm::device_async_resource_ref);
 
 // EIDecl for int64_t + double
 extern template std::unique_ptr<legacy::GraphCSR<int64_t, int64_t, double>>
 coo_to_csr<int64_t, int64_t, double>(legacy::GraphCOOView<int64_t, int64_t, double> const& graph,
-                                     rmm::mr::device_memory_resource*);
+                                     rmm::device_async_resource_ref);
 
 // in-place versions:
 //
diff --git a/cpp/src/tree/legacy/mst.cu b/cpp/src/tree/legacy/mst.cu
index a7fdd45c4eb..a10d75680fd 100644
--- a/cpp/src/tree/legacy/mst.cu
+++ b/cpp/src/tree/legacy/mst.cu
@@ -21,6 +21,7 @@
 #include <raft/sparse/solver/mst.cuh>
 
 #include <rmm/exec_policy.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <thrust/transform.h>
 
@@ -36,7 +37,7 @@ template <typename vertex_t, typename edge_t, typename weight_t>
 std::unique_ptr<legacy::GraphCOO<vertex_t, edge_t, weight_t>> mst_impl(
   raft::handle_t const& handle,
   legacy::GraphCSRView<vertex_t, edge_t, weight_t> const& graph,
-  rmm::mr::device_memory_resource* mr)
+  rmm::device_async_resource_ref mr)
 
 {
   auto stream = handle.get_stream();
@@ -66,7 +67,7 @@ template <typename vertex_t, typename edge_t, typename weight_t>
 std::unique_ptr<legacy::GraphCOO<vertex_t, edge_t, weight_t>> minimum_spanning_tree(
   raft::handle_t const& handle,
   legacy::GraphCSRView<vertex_t, edge_t, weight_t> const& graph,
-  rmm::mr::device_memory_resource* mr)
+  rmm::device_async_resource_ref mr)
 {
   return detail::mst_impl(handle, graph, mr);
 }
@@ -74,9 +75,9 @@ std::unique_ptr<legacy::GraphCOO<vertex_t, edge_t, weight_t>> minimum_spanning_t
 template std::unique_ptr<legacy::GraphCOO<int, int, float>> minimum_spanning_tree<int, int, float>(
   raft::handle_t const& handle,
   legacy::GraphCSRView<int, int, float> const& graph,
-  rmm::mr::device_memory_resource* mr);
+  rmm::device_async_resource_ref mr);
 template std::unique_ptr<legacy::GraphCOO<int, int, double>>
 minimum_spanning_tree<int, int, double>(raft::handle_t const& handle,
                                         legacy::GraphCSRView<int, int, double> const& graph,
-                                        rmm::mr::device_memory_resource* mr);
+                                        rmm::device_async_resource_ref mr);
 }  // namespace cugraph
diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp
index 5c984d7979b..cb302674a25 100644
--- a/cpp/tests/utilities/base_fixture.hpp
+++ b/cpp/tests/utilities/base_fixture.hpp
@@ -29,6 +29,7 @@
 #include <rmm/mr/device/owning_wrapper.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 #include <rmm/mr/device/pool_memory_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <gtest/gtest.h>
 
@@ -52,14 +53,14 @@ namespace test {
  * ```
  **/
 class BaseFixture : public ::testing::Test {
-  rmm::mr::device_memory_resource* _mr{rmm::mr::get_current_device_resource()};
+  rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()};
 
  public:
   /**
    * @brief Returns pointer to `device_memory_resource` that should be used for all tests inheriting
    *from this fixture
    **/
-  rmm::mr::device_memory_resource* mr() { return _mr; }
+  rmm::device_async_resource_ref mr() { return _mr; }
 };
 
 /// MR factory functions