diff --git a/cpp/include/raft/linalg/init.h b/cpp/include/raft/linalg/init.h
index cb2e8ed1ab..9944685a1f 100644
--- a/cpp/include/raft/linalg/init.h
+++ b/cpp/include/raft/linalg/init.h
@@ -19,6 +19,7 @@
 #include <thrust/copy.h>
 #include <thrust/device_ptr.h>
 #include <thrust/iterator/counting_iterator.h>
+#include <rmm/exec_policy.hpp>
 
 namespace raft {
 namespace linalg {
@@ -40,7 +41,7 @@ void range(T *out, int start, int end, cudaStream_t stream) {
   thrust::counting_iterator<int> first(start);
   thrust::counting_iterator<int> last = first + (end - start);
   thrust::device_ptr<T> ptr(out);
-  thrust::copy(thrust::cuda::par.on(stream), first, last, ptr);
+  thrust::copy(rmm::exec_policy(stream), first, last, ptr);
 }
 
 /**
diff --git a/cpp/include/raft/linalg/transpose.h b/cpp/include/raft/linalg/transpose.h
index d90f6271fa..db1cabd694 100644
--- a/cpp/include/raft/linalg/transpose.h
+++ b/cpp/include/raft/linalg/transpose.h
@@ -17,8 +17,8 @@
 #pragma once
 
 #include <raft/linalg/cublas_wrappers.h>
-#include <thrust/device_vector.h>
 #include <raft/handle.hpp>
+#include <rmm/exec_policy.hpp>
 
 namespace raft {
 namespace linalg {
@@ -60,7 +60,7 @@ void transpose(math_t *inout, int n, cudaStream_t stream) {
   auto d_inout = inout;
   auto counting = thrust::make_counting_iterator<int>(0);
 
-  thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size,
+  thrust::for_each(rmm::exec_policy(stream), counting, counting + size,
                    [=] __device__(int idx) {
                      int s_row = idx % m;
                      int s_col = idx / m;
diff --git a/cpp/include/raft/matrix/matrix.cuh b/cpp/include/raft/matrix/matrix.cuh
index 5f5755e24e..688b92da09 100644
--- a/cpp/include/raft/matrix/matrix.cuh
+++ b/cpp/include/raft/matrix/matrix.cuh
@@ -20,13 +20,13 @@
 #include <cusolverDn.h>
 #include <raft/cudart_utils.h>
 #include <raft/linalg/cublas_wrappers.h>
-#include <thrust/device_vector.h>
 #include <thrust/execution_policy.h>
 #include <algorithm>
 #include <cstddef>
 #include <raft/cache/cache_util.cuh>
 #include <raft/cuda_utils.cuh>
 #include <raft/handle.hpp>
+#include <rmm/exec_policy.hpp>
 
 namespace raft {
 namespace matrix {
@@ -64,7 +64,7 @@ void copyRows(const m_t *in, idx_t n_rows, idx_t n_cols, m_t *out,
   idx_t size = n_rows_indices * n_cols;
   auto counting = thrust::make_counting_iterator<idx_t>(0);
 
-  thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size,
+  thrust::for_each(rmm::exec_policy(stream), counting, counting + size,
                    [=] __device__(idx_t idx) {
                      idx_t row = idx % n_rows_indices;
                      idx_t col = idx / n_rows_indices;
@@ -108,7 +108,7 @@ void truncZeroOrigin(m_t *in, idx_t in_n_rows, m_t *out, idx_t out_n_rows,
   auto d_q_trunc = out;
   auto counting = thrust::make_counting_iterator<idx_t>(0);
 
-  thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size,
+  thrust::for_each(rmm::exec_policy(stream), counting, counting + size,
                    [=] __device__(idx_t idx) {
                      idx_t row = idx % m;
                      idx_t col = idx / m;
@@ -133,8 +133,8 @@ void colReverse(m_t *inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) {
   auto d_q_reversed = inout;
   auto counting = thrust::make_counting_iterator<idx_t>(0);
 
-  thrust::for_each(thrust::cuda::par.on(stream), counting,
-                   counting + (size / 2), [=] __device__(idx_t idx) {
+  thrust::for_each(rmm::exec_policy(stream), counting, counting + (size / 2),
+                   [=] __device__(idx_t idx) {
                      idx_t dest_row = idx % m;
                      idx_t dest_col = idx / m;
                      idx_t src_row = dest_row;
@@ -161,8 +161,8 @@ void rowReverse(m_t *inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) {
   auto d_q_reversed = inout;
   auto counting = thrust::make_counting_iterator<idx_t>(0);
 
-  thrust::for_each(thrust::cuda::par.on(stream), counting,
-                   counting + (size / 2), [=] __device__(idx_t idx) {
+  thrust::for_each(rmm::exec_policy(stream), counting, counting + (size / 2),
+                   [=] __device__(idx_t idx) {
                      idx_t dest_row = idx % m;
                      idx_t dest_col = idx / m;
                      idx_t src_row = (m - dest_row) - 1;
diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh
index 16f351bf48..79b18ebd0a 100644
--- a/cpp/include/raft/sparse/convert/csr.cuh
+++ b/cpp/include/raft/sparse/convert/csr.cuh
@@ -160,7 +160,7 @@ void sorted_coo_to_csr(const T *rows, int nnz, T *row_ind, int m,
   thrust::device_ptr<T> row_counts_d =
     thrust::device_pointer_cast(row_counts.data());
   thrust::device_ptr<T> c_ind_d = thrust::device_pointer_cast(row_ind);
-  exclusive_scan(thrust::cuda::par.on(stream), row_counts_d, row_counts_d + m,
+  exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m,
                  c_ind_d);
 }
 
diff --git a/cpp/include/raft/sparse/coo.cuh b/cpp/include/raft/sparse/coo.cuh
index 6af8eae395..fa21614f8f 100644
--- a/cpp/include/raft/sparse/coo.cuh
+++ b/cpp/include/raft/sparse/coo.cuh
@@ -22,7 +22,6 @@
 #include <cusparse_v2.h>
 
 #include <thrust/device_ptr.h>
-#include <thrust/device_vector.h>
 #include <thrust/scan.h>
 
 #include <cuda_runtime.h>
diff --git a/cpp/include/raft/sparse/distance/coo_spmv_strategies/base_strategy.cuh b/cpp/include/raft/sparse/distance/coo_spmv_strategies/base_strategy.cuh
index 194799aed0..3b57225350 100644
--- a/cpp/include/raft/sparse/distance/coo_spmv_strategies/base_strategy.cuh
+++ b/cpp/include/raft/sparse/distance/coo_spmv_strategies/base_strategy.cuh
@@ -22,7 +22,6 @@
 #include "coo_mask_row_iterators.cuh"
 
 #include <rmm/device_uvector.hpp>
-#include <rmm/device_vector.hpp>
 
 namespace raft {
 namespace sparse {
diff --git a/cpp/include/raft/sparse/distance/utils.cuh b/cpp/include/raft/sparse/distance/utils.cuh
index 6b6d77a2d5..3bee1bc87d 100644
--- a/cpp/include/raft/sparse/distance/utils.cuh
+++ b/cpp/include/raft/sparse/distance/utils.cuh
@@ -21,8 +21,6 @@
 
 #include <cub/cub.cuh>
 
-#include <rmm/device_vector.hpp>
-
 namespace raft {
 namespace sparse {
 namespace distance {
diff --git a/cpp/include/raft/sparse/linalg/add.cuh b/cpp/include/raft/sparse/linalg/add.cuh
index 3bf028d14a..7ed627b9e2 100644
--- a/cpp/include/raft/sparse/linalg/add.cuh
+++ b/cpp/include/raft/sparse/linalg/add.cuh
@@ -22,6 +22,7 @@
 #include <raft/sparse/cusparse_wrappers.h>
 #include <raft/cuda_utils.cuh>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/device_ptr.h>
 #include <thrust/scan.h>
@@ -181,7 +182,7 @@ size_t csr_add_calc_inds(const int *a_ind, const int *a_indptr, const T *a_val,
   thrust::device_ptr<int> row_counts_d =
     thrust::device_pointer_cast(row_counts.data());
   thrust::device_ptr<int> c_ind_d = thrust::device_pointer_cast(out_ind);
-  exclusive_scan(thrust::cuda::par.on(stream), row_counts_d, row_counts_d + m,
+  exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m,
                  c_ind_d);
 
   return cnnz;
diff --git a/cpp/include/raft/sparse/linalg/symmetrize.cuh b/cpp/include/raft/sparse/linalg/symmetrize.cuh
index 614c9d830e..a6e1027288 100644
--- a/cpp/include/raft/sparse/linalg/symmetrize.cuh
+++ b/cpp/include/raft/sparse/linalg/symmetrize.cuh
@@ -22,6 +22,7 @@
 #include <raft/sparse/cusparse_wrappers.h>
 #include <raft/cuda_utils.cuh>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <raft/sparse/op/sort.h>
 #include <thrust/device_ptr.h>
@@ -293,8 +294,8 @@ void from_knn_symmetrize_matrix(const value_idx *restrict knn_indices,
     thrust::device_pointer_cast(row_sizes.data());
 
   // Rolling cumulative sum
-  thrust::exclusive_scan(thrust::cuda::par.on(stream), __row_sizes,
-                         __row_sizes + n, __edges);
+  thrust::exclusive_scan(rmm::exec_policy(stream), __row_sizes, __row_sizes + n,
+                         __edges);
 
   // (5) Perform final data + data.T operation in tandem with memcpying
   symmetric_sum<<<numBlocks, threadsPerBlock, 0, stream>>>(
diff --git a/cpp/include/raft/sparse/mst/detail/mst_solver_inl.cuh b/cpp/include/raft/sparse/mst/detail/mst_solver_inl.cuh
index 029b76a945..33b980afcd 100644
--- a/cpp/include/raft/sparse/mst/detail/mst_solver_inl.cuh
+++ b/cpp/include/raft/sparse/mst/detail/mst_solver_inl.cuh
@@ -23,10 +23,10 @@
 #include "utils.cuh"
 
 #include <raft/cudart_utils.h>
-#include <rmm/device_buffer.hpp>
+#include <rmm/device_scalar.hpp>
+#include <rmm/device_uvector.hpp>
 
 #include <thrust/device_ptr.h>
-#include <thrust/device_vector.h>
 #include <thrust/execution_policy.h>
 #include <thrust/host_vector.h>
 #include <thrust/reduce.h>
@@ -34,10 +34,6 @@
 #include <thrust/transform.h>
 #include <iostream>
 
-#include <raft/cudart_utils.h>
-
-#include <rmm/device_buffer.hpp>
-
 namespace raft {
 namespace mst {
 typedef std::chrono::high_resolution_clock Clock;
@@ -63,20 +59,20 @@ MST_solver<vertex_t, edge_t, weight_t, alteration_t>::MST_solver(
     offsets(offsets_),
     indices(indices_),
     weights(weights_),
-    altered_weights(e_),
+    altered_weights(e_, stream_),
     v(v_),
     e(e_),
     color_index(color_),
-    color(v_),
-    next_color(v_),
-    min_edge_color(v_),
-    new_mst_edge(v_),
-    mst_edge(e_, false),
-    temp_src(2 * v_),
-    temp_dst(2 * v_),
-    temp_weights(2 * v_),
-    mst_edge_count(1, 0),
-    prev_mst_edge_count(1, 0),
+    color(v_, stream_),
+    next_color(v_, stream_),
+    min_edge_color(v_, stream_),
+    new_mst_edge(v_, stream_),
+    mst_edge(e_, stream_),
+    temp_src(2 * v_, stream_),
+    temp_dst(2 * v_, stream_),
+    temp_weights(2 * v_, stream_),
+    mst_edge_count(1, stream_),
+    prev_mst_edge_count(1, stream_),
     stream(stream_),
     symmetrize_output(symmetrize_output_),
     initialize_colors(initialize_colors_),
@@ -85,13 +81,18 @@ MST_solver<vertex_t, edge_t, weight_t, alteration_t>::MST_solver(
   max_threads = handle_.get_device_properties().maxThreadsPerBlock;
   sm_count = handle_.get_device_properties().multiProcessorCount;
 
+  mst_edge_count.set_value_to_zero_async(stream);
+  prev_mst_edge_count.set_value_to_zero_async(stream);
+  CUDA_CHECK(cudaMemsetAsync(mst_edge.data(), 0, mst_edge.size() * sizeof(bool),
+                             stream));
+
   //Initially, color holds the vertex id as color
   auto policy = handle.get_thrust_policy();
   if (initialize_colors_) {
     thrust::sequence(policy, color.begin(), color.end(), 0);
     thrust::sequence(policy, color_index, color_index + v, 0);
   } else {
-    raft::copy(color.data().get(), color_index, v, stream);
+    raft::copy(color.data(), color_index, v, stream);
   }
   thrust::sequence(policy, next_color.begin(), next_color.end(), 0);
 }
@@ -158,12 +159,12 @@ MST_solver<vertex_t, edge_t, weight_t, alteration_t>::solve() {
     timer3 += duration_us(stop - start);
 #endif
 
-    auto curr_mst_edge_count = mst_edge_count[0];
+    auto curr_mst_edge_count = mst_edge_count.value(stream);
     RAFT_EXPECTS(curr_mst_edge_count <= max_mst_edges,
                  "Number of edges found by MST is invalid. This may be due to "
                  "loss in precision. Try increasing precision of weights.");
 
-    if (curr_mst_edge_count == prev_mst_edge_count[0]) {
+    if (curr_mst_edge_count == prev_mst_edge_count.value(stream)) {
 #ifdef MST_TIME
       std::cout << "Iterations: " << i << std::endl;
       std::cout << timer0 << "," << timer1 << "," << timer2 << "," << timer3
@@ -194,12 +195,11 @@ MST_solver<vertex_t, edge_t, weight_t, alteration_t>::solve() {
 #endif
 
     // copy this iteration's results and store
-    prev_mst_edge_count = mst_edge_count;
+    prev_mst_edge_count.set_value_async(curr_mst_edge_count, stream);
   }
 
   // result packaging
-  thrust::host_vector<edge_t> host_mst_edge_count = mst_edge_count;
-  mst_result.n_edges = host_mst_edge_count[0];
+  mst_result.n_edges = mst_edge_count.value(stream);
   mst_result.src.resize(mst_result.n_edges, stream);
   mst_result.dst.resize(mst_result.n_edges, stream);
   mst_result.weights.resize(mst_result.n_edges, stream);
@@ -226,7 +226,7 @@ template <typename vertex_t, typename edge_t, typename weight_t,
 alteration_t
 MST_solver<vertex_t, edge_t, weight_t, alteration_t>::alteration_max() {
   auto policy = handle.get_thrust_policy();
-  rmm::device_vector<weight_t> tmp(e);
+  rmm::device_uvector<weight_t> tmp(e, stream);
   thrust::device_ptr<const weight_t> weights_ptr(weights);
   thrust::copy(policy, weights_ptr, weights_ptr + e, tmp.begin());
   //sort tmp weights
@@ -240,7 +240,7 @@ MST_solver<vertex_t, edge_t, weight_t, alteration_t>::alteration_max() {
     thrust::make_zip_iterator(thrust::make_tuple(tmp.begin(), tmp.begin() + 1));
   auto end =
     thrust::make_zip_iterator(thrust::make_tuple(new_end - 1, new_end));
-  auto init = tmp[1] - tmp[0];
+  auto init = tmp.element(1, stream) - tmp.element(0, stream);
   auto max =
     thrust::transform_reduce(policy, begin, end, alteration_functor<weight_t>(),
                              init, thrust::minimum<weight_t>());
@@ -259,7 +259,7 @@ void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::alteration() {
   alteration_t max = alteration_max();
 
   // pool of rand values
-  rmm::device_vector<alteration_t> rand_values(v);
+  rmm::device_uvector<alteration_t> rand_values(v, stream);
 
   // Random number generator
   curandGenerator_t randGen;
@@ -267,8 +267,7 @@ void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::alteration() {
   curandSetPseudoRandomGeneratorSeed(randGen, 1234567);
 
   // Initialize rand values
-  auto curand_status =
-    curand_generate_uniformX(randGen, rand_values.data().get(), v);
+  auto curand_status = curand_generate_uniformX(randGen, rand_values.data(), v);
   RAFT_EXPECTS(curand_status == CURAND_STATUS_SUCCESS, "MST: CURAND failed");
   curand_status = curandDestroyGenerator(randGen);
   RAFT_EXPECTS(curand_status == CURAND_STATUS_SUCCESS,
@@ -276,8 +275,8 @@ void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::alteration() {
 
   //Alterate the weights, make all undirected edge weight unique while keeping Wuv == Wvu
   detail::alteration_kernel<<<nblocks, nthreads, 0, stream>>>(
-    v, e, offsets, indices, weights, max, rand_values.data().get(),
-    altered_weights.data().get());
+    v, e, offsets, indices, weights, max, rand_values.data(),
+    altered_weights.data());
 }
 
 // updates colors of vertices by propagating the lower color to the higher
@@ -286,23 +285,24 @@ template <typename vertex_t, typename edge_t, typename weight_t,
 void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::label_prop(
   vertex_t* mst_src, vertex_t* mst_dst) {
   // update the colors of both ends its until there is no change in colors
-  thrust::host_vector<edge_t> curr_mst_edge_count = mst_edge_count;
+  edge_t curr_mst_edge_count = mst_edge_count.value(stream);
 
   auto min_pair_nthreads = std::min(v, (vertex_t)max_threads);
   auto min_pair_nblocks = std::min(
     (v + min_pair_nthreads - 1) / min_pair_nthreads, (vertex_t)max_blocks);
 
-  rmm::device_vector<bool> done(1, false);
-
-  edge_t* new_mst_edge_ptr = new_mst_edge.data().get();
-  vertex_t* color_ptr = color.data().get();
-  vertex_t* next_color_ptr = next_color.data().get();
+  edge_t* new_mst_edge_ptr = new_mst_edge.data();
+  vertex_t* color_ptr = color.data();
+  vertex_t* next_color_ptr = next_color.data();
 
-  bool* done_ptr = done.data().get();
+  rmm::device_scalar<bool> done(stream);
+  done.set_value_to_zero_async(stream);
+  bool* done_ptr = done.data();
+  const bool true_val = true;
 
   auto i = 0;
-  while (!done[0]) {
-    done[0] = true;
+  while (!done.value(stream)) {
+    done.set_value_async(true_val, stream);
 
     detail::min_pair_colors<<<min_pair_nblocks, min_pair_nthreads, 0, stream>>>(
       v, indices, new_mst_edge_ptr, color_ptr, color_index, next_color_ptr);
@@ -333,11 +333,11 @@ void MST_solver<vertex_t, edge_t, weight_t,
 
   int n_threads = 32;
 
-  vertex_t* color_ptr = color.data().get();
-  edge_t* new_mst_edge_ptr = new_mst_edge.data().get();
-  bool* mst_edge_ptr = mst_edge.data().get();
-  alteration_t* min_edge_color_ptr = min_edge_color.data().get();
-  alteration_t* altered_weights_ptr = altered_weights.data().get();
+  vertex_t* color_ptr = color.data();
+  edge_t* new_mst_edge_ptr = new_mst_edge.data();
+  bool* mst_edge_ptr = mst_edge.data();
+  alteration_t* min_edge_color_ptr = min_edge_color.data();
+  alteration_t* altered_weights_ptr = altered_weights.data();
 
   detail::kernel_min_edge_per_vertex<<<v, n_threads, 0, stream>>>(
     offsets, indices, altered_weights_ptr, color_ptr, color_index,
@@ -356,14 +356,14 @@ void MST_solver<vertex_t, edge_t, weight_t,
   thrust::fill(policy, temp_src.begin(), temp_src.end(),
                std::numeric_limits<vertex_t>::max());
 
-  vertex_t* color_ptr = color.data().get();
-  edge_t* new_mst_edge_ptr = new_mst_edge.data().get();
-  bool* mst_edge_ptr = mst_edge.data().get();
-  alteration_t* min_edge_color_ptr = min_edge_color.data().get();
-  alteration_t* altered_weights_ptr = altered_weights.data().get();
-  vertex_t* temp_src_ptr = temp_src.data().get();
-  vertex_t* temp_dst_ptr = temp_dst.data().get();
-  weight_t* temp_weights_ptr = temp_weights.data().get();
+  vertex_t* color_ptr = color.data();
+  edge_t* new_mst_edge_ptr = new_mst_edge.data();
+  bool* mst_edge_ptr = mst_edge.data();
+  alteration_t* min_edge_color_ptr = min_edge_color.data();
+  alteration_t* altered_weights_ptr = altered_weights.data();
+  vertex_t* temp_src_ptr = temp_src.data();
+  vertex_t* temp_dst_ptr = temp_dst.data();
+  weight_t* temp_weights_ptr = temp_weights.data();
 
   detail::min_edge_per_supervertex<<<nblocks, nthreads, 0, stream>>>(
     color_ptr, color_index, new_mst_edge_ptr, mst_edge_ptr, indices, weights,
@@ -388,8 +388,8 @@ void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::check_termination() {
     std::min((2 * v + nthreads - 1) / nthreads, (vertex_t)max_blocks);
 
   // count number of new mst edges
-  edge_t* mst_edge_count_ptr = mst_edge_count.data().get();
-  vertex_t* temp_src_ptr = temp_src.data().get();
+  edge_t* mst_edge_count_ptr = mst_edge_count.data();
+  vertex_t* temp_src_ptr = temp_src.data();
 
   detail::kernel_count_new_mst_edges<<<nblocks, nthreads, 0, stream>>>(
     temp_src_ptr, mst_edge_count_ptr, 2 * v);
@@ -411,7 +411,7 @@ void MST_solver<vertex_t, edge_t, weight_t, alteration_t>::append_src_dst_pair(
   vertex_t* mst_src, vertex_t* mst_dst, weight_t* mst_weights) {
   auto policy = handle.get_thrust_policy();
 
-  auto curr_mst_edge_count = prev_mst_edge_count[0];
+  edge_t curr_mst_edge_count = prev_mst_edge_count.value(stream);
 
   // iterator to end of mst edges added to final output in previous iteration
   auto src_dst_zip_end = thrust::make_zip_iterator(thrust::make_tuple(
diff --git a/cpp/include/raft/sparse/mst/detail/utils.cuh b/cpp/include/raft/sparse/mst/detail/utils.cuh
index 8f755de459..4d5ca6ebe1 100644
--- a/cpp/include/raft/sparse/mst/detail/utils.cuh
+++ b/cpp/include/raft/sparse/mst/detail/utils.cuh
@@ -18,7 +18,7 @@
 #pragma once
 
 #include <iostream>
-#include <rmm/device_vector.hpp>
+#include <rmm/device_uvector.hpp>
 #define MST_TIME
 
 namespace raft {
@@ -32,7 +32,7 @@ __device__ idx_t get_1D_idx() {
 
 // somewhat smart vector print
 template <typename T>
-void printv(rmm::device_vector<T>& vec, const std::string& name = "",
+void printv(rmm::device_uvector<T>& vec, const std::string& name = "",
             const size_t displ = 5) {
 #ifdef MST_TIME
   std::cout.precision(15);
diff --git a/cpp/include/raft/sparse/mst/mst_solver.cuh b/cpp/include/raft/sparse/mst/mst_solver.cuh
index 833882ea0d..44b34ee5c7 100644
--- a/cpp/include/raft/sparse/mst/mst_solver.cuh
+++ b/cpp/include/raft/sparse/mst/mst_solver.cuh
@@ -18,8 +18,8 @@
 #pragma once
 
 #include <raft/handle.hpp>
+#include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
-#include <rmm/device_vector.hpp>
 
 namespace raft {
 
@@ -68,24 +68,24 @@ class MST_solver {
   vertex_t sm_count;
 
   vertex_t* color_index;  // represent each supervertex as a color
-  rmm::device_vector<alteration_t>
+  rmm::device_uvector<alteration_t>
     min_edge_color;  // minimum incident edge weight per color
-  rmm::device_vector<edge_t> new_mst_edge;  // new minimum edge per vertex
-  rmm::device_vector<alteration_t>
+  rmm::device_uvector<edge_t> new_mst_edge;  // new minimum edge per vertex
+  rmm::device_uvector<alteration_t>
     altered_weights;  // weights to be used for mst
-  rmm::device_vector<edge_t>
+  rmm::device_scalar<edge_t>
     mst_edge_count;  // total number of edges added after every iteration
-  rmm::device_vector<edge_t>
+  rmm::device_scalar<edge_t>
     prev_mst_edge_count;  // total number of edges up to the previous iteration
-  rmm::device_vector<bool>
+  rmm::device_uvector<bool>
     mst_edge;  // mst output -  true if the edge belongs in mst
-  rmm::device_vector<vertex_t> next_color;  //  next iteration color
-  rmm::device_vector<vertex_t> color;  // index of color that vertex points to
+  rmm::device_uvector<vertex_t> next_color;  //  next iteration color
+  rmm::device_uvector<vertex_t> color;  // index of color that vertex points to
 
   // new src-dst pairs found per iteration
-  rmm::device_vector<vertex_t> temp_src;
-  rmm::device_vector<vertex_t> temp_dst;
-  rmm::device_vector<weight_t> temp_weights;
+  rmm::device_uvector<vertex_t> temp_src;
+  rmm::device_uvector<vertex_t> temp_dst;
+  rmm::device_uvector<weight_t> temp_weights;
 
   void label_prop(vertex_t* mst_src, vertex_t* mst_dst);
   void min_edge_per_vertex();
diff --git a/cpp/include/raft/sparse/op/sort.h b/cpp/include/raft/sparse/op/sort.h
index d53ceb62a9..9414a11ade 100644
--- a/cpp/include/raft/sparse/op/sort.h
+++ b/cpp/include/raft/sparse/op/sort.h
@@ -21,7 +21,7 @@
 #include <raft/cudart_utils.h>
 #include <raft/sparse/cusparse_wrappers.h>
 #include <raft/cuda_utils.cuh>
-#include <raft/mr/device/buffer.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/device_ptr.h>
 #include <thrust/scan.h>
@@ -69,8 +69,8 @@ void coo_sort(int m, int n, int nnz, int *rows, int *cols, T *vals,
   auto coo_indices = thrust::make_zip_iterator(thrust::make_tuple(rows, cols));
 
   // get all the colors in contiguous locations so we can map them to warps.
-  thrust::sort_by_key(thrust::cuda::par.on(stream), coo_indices,
-                      coo_indices + nnz, vals, TupleComp());
+  thrust::sort_by_key(rmm::exec_policy(stream), coo_indices, coo_indices + nnz,
+                      vals, TupleComp());
 }
 
 /**
@@ -104,8 +104,7 @@ void coo_sort_by_weight(value_idx *rows, value_idx *cols, value_t *data,
 
   auto first = thrust::make_zip_iterator(thrust::make_tuple(rows, cols));
 
-  thrust::sort_by_key(thrust::cuda::par.on(stream), t_data, t_data + nnz,
-                      first);
+  thrust::sort_by_key(rmm::exec_policy(stream), t_data, t_data + nnz, first);
 }
 };  // namespace op
 };  // end NAMESPACE sparse
diff --git a/cpp/include/raft/sparse/selection/connect_components.cuh b/cpp/include/raft/sparse/selection/connect_components.cuh
index 390522c9bc..46369ca964 100644
--- a/cpp/include/raft/sparse/selection/connect_components.cuh
+++ b/cpp/include/raft/sparse/selection/connect_components.cuh
@@ -30,6 +30,7 @@
 #include <thrust/device_ptr.h>
 #include <thrust/sort.h>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <cub/cub.cuh>
 
@@ -211,7 +212,7 @@ void perform_1nn(cub::KeyValuePair<value_idx, value_t> *kvp,
     workspace.data(), reduction_op, reduction_op, true, true, stream);
 
   LookupColorOp<value_idx, value_t> extract_colors_op(colors);
-  thrust::transform(thrust::cuda::par.on(stream), kvp, kvp + n_rows, nn_colors,
+  thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors,
                     extract_colors_op);
 }
 
@@ -232,15 +233,15 @@ void sort_by_color(value_idx *colors, value_idx *nn_colors,
                    cub::KeyValuePair<value_idx, value_t> *kvp,
                    value_idx *src_indices, size_t n_rows, cudaStream_t stream) {
   thrust::counting_iterator<value_idx> arg_sort_iter(0);
-  thrust::copy(thrust::cuda::par.on(stream), arg_sort_iter,
-               arg_sort_iter + n_rows, src_indices);
+  thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows,
+               src_indices);
 
   auto keys = thrust::make_zip_iterator(thrust::make_tuple(
     colors, nn_colors, (raft::linkage::KeyValuePair<value_idx, value_t> *)kvp));
   auto vals = thrust::make_zip_iterator(thrust::make_tuple(src_indices));
 
   // get all the colors in contiguous locations so we can map them to warps.
-  thrust::sort_by_key(thrust::cuda::par.on(stream), keys, keys + n_rows, vals,
+  thrust::sort_by_key(rmm::exec_policy(stream), keys, keys + n_rows, vals,
                       TupleComp());
 }
 
diff --git a/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh b/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh
index c0345a01e6..43bdf12a38 100644
--- a/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh
+++ b/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh
@@ -39,7 +39,6 @@
 #include <faiss/gpu/utils/Select.cuh>
 #include <faiss/gpu/utils/Tensor.cuh>
 
-#include <thrust/device_vector.h>
 #include <thrust/iterator/transform_iterator.h>
 
 #include <raft/linalg/distance_type.h>
diff --git a/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh b/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh
index 6db8fb7a8e..84c130b0e4 100644
--- a/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh
+++ b/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh
@@ -29,7 +29,6 @@
 #include <faiss/gpu/utils/Select.cuh>
 
 #include <raft/linalg/distance_type.h>
-#include <thrust/device_vector.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <iostream>
 #include <raft/handle.hpp>
diff --git a/cpp/include/raft/spectral/kmeans.hpp b/cpp/include/raft/spectral/kmeans.hpp
index 5928c727c6..b6f0105487 100644
--- a/cpp/include/raft/spectral/kmeans.hpp
+++ b/cpp/include/raft/spectral/kmeans.hpp
@@ -21,7 +21,6 @@
 
 #include <cuda.h>
 #include <thrust/binary_search.h>
-#include <thrust/device_vector.h>
 #include <thrust/gather.h>
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/random.h>
diff --git a/cpp/include/raft/spectral/modularity_maximization.hpp b/cpp/include/raft/spectral/modularity_maximization.hpp
index 1fe7819a7e..fededbfcb4 100644
--- a/cpp/include/raft/spectral/modularity_maximization.hpp
+++ b/cpp/include/raft/spectral/modularity_maximization.hpp
@@ -20,7 +20,6 @@
 #include <stdio.h>
 
 #include <cuda.h>
-#include <thrust/device_vector.h>
 #include <thrust/fill.h>
 #include <thrust/reduce.h>
 #include <thrust/transform.h>
diff --git a/cpp/include/raft/spectral/partition.hpp b/cpp/include/raft/spectral/partition.hpp
index a994895886..2df3812a4a 100644
--- a/cpp/include/raft/spectral/partition.hpp
+++ b/cpp/include/raft/spectral/partition.hpp
@@ -19,7 +19,6 @@
 #include <stdio.h>
 
 #include <cuda.h>
-#include <thrust/device_vector.h>
 #include <thrust/fill.h>
 #include <thrust/reduce.h>
 #include <thrust/transform.h>
diff --git a/cpp/include/raft/spectral/spectral_util.hpp b/cpp/include/raft/spectral/spectral_util.hpp
index de9ff1917f..c148350c0f 100644
--- a/cpp/include/raft/spectral/spectral_util.hpp
+++ b/cpp/include/raft/spectral/spectral_util.hpp
@@ -19,7 +19,6 @@
 #include <raft/cudart_utils.h>
 #include <raft/handle.hpp>
 
-#include <thrust/device_vector.h>
 #include <thrust/fill.h>
 #include <thrust/reduce.h>
 #include <thrust/transform.h>
diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/eigen_solvers.cu
index 8025d8dcd6..ef67d95348 100644
--- a/cpp/test/eigen_solvers.cu
+++ b/cpp/test/eigen_solvers.cu
@@ -38,7 +38,6 @@ TEST(Raft, EigenSolvers) {
   index_type nnz = 0;
   index_type nrows = 0;
   auto stream = h.get_stream();
-  auto t_exe_pol = thrust::cuda::par.on(stream);
 
   sparse_matrix_t<index_type, value_type> sm1{h, ro, ci, vs, nrows, nnz};
   ASSERT_EQ(nullptr, sm1.row_offsets_);
diff --git a/cpp/test/linalg/reduce.cuh b/cpp/test/linalg/reduce.cuh
index 18261287cf..86cb4f32b4 100644
--- a/cpp/test/linalg/reduce.cuh
+++ b/cpp/test/linalg/reduce.cuh
@@ -16,9 +16,9 @@
 
 #include <cublas_v2.h>
 #include <raft/linalg/cublas_wrappers.h>
-#include <thrust/device_vector.h>
 #include <raft/cuda_utils.cuh>
 #include <raft/linalg/unary_op.cuh>
+#include <rmm/device_uvector.hpp>
 
 namespace raft {
 namespace linalg {
@@ -51,17 +51,20 @@ void unaryAndGemv(Type *dots, const Type *data, int D, int N,
                   cudaStream_t stream) {
   //computes a MLCommon unary op on data (squares it), then computes Ax
   //(A input matrix and x column vector) to sum columns
-  thrust::device_vector<Type> sq(D * N);
+  rmm::device_uvector<Type> sq(D * N, stream);
   raft::linalg::unaryOp(
     thrust::raw_pointer_cast(sq.data()), data, D * N,
     [] __device__(Type v) { return v * v; }, stream);
   cublasHandle_t handle;
   CUBLAS_CHECK(cublasCreate(&handle));
-  thrust::device_vector<Type> ones(N, 1);  //column vector [1...1]
+  rmm::device_uvector<Type> ones(N, stream);  //column vector [1...1]
+  raft::linalg::unaryOp<Type>(
+    ones.data(), ones.data(), ones.size(),
+    [=] __device__(Type input) { return 1; }, stream);
   Type alpha = 1, beta = 0;
-  CUBLAS_CHECK(raft::linalg::cublasgemv(
-    handle, CUBLAS_OP_N, D, N, &alpha, thrust::raw_pointer_cast(sq.data()), D,
-    thrust::raw_pointer_cast(ones.data()), 1, &beta, dots, 1, stream));
+  CUBLAS_CHECK(raft::linalg::cublasgemv(handle, CUBLAS_OP_N, D, N, &alpha,
+                                        sq.data(), D, ones.data(), 1, &beta,
+                                        dots, 1, stream));
   CUDA_CHECK(cudaDeviceSynchronize());
   CUBLAS_CHECK(cublasDestroy(handle));
 }
diff --git a/cpp/test/matrix/matrix.cu b/cpp/test/matrix/matrix.cu
index e7da92a136..2d2d9d2057 100644
--- a/cpp/test/matrix/matrix.cu
+++ b/cpp/test/matrix/matrix.cu
@@ -112,7 +112,7 @@ class MatrixCopyRowsTest : public ::testing::Test {
     // Init input array
     thrust::counting_iterator<idx_t> first(0);
     thrust::device_ptr<math_t> ptr(input.data());
-    thrust::copy(thrust::cuda::par.on(stream), first, first + n_cols * n_rows,
+    thrust::copy(handle.get_thrust_policy(), first, first + n_cols * n_rows,
                  ptr);
   }
 
diff --git a/cpp/test/mst.cu b/cpp/test/mst.cu
index d7aa76500b..4714fd5eaa 100644
--- a/cpp/test/mst.cu
+++ b/cpp/test/mst.cu
@@ -18,7 +18,7 @@
 
 #include <gtest/gtest.h>
 #include <iostream>
-#include <rmm/device_buffer.hpp>
+#include <rmm/device_uvector.hpp>
 #include <vector>
 
 #include <raft/cudart_utils.h>
@@ -127,11 +127,18 @@ class MSTTest
     v = static_cast<vertex_t>((csr_d.offsets.size() / sizeof(vertex_t)) - 1);
     e = static_cast<edge_t>(csr_d.indices.size() / sizeof(edge_t));
 
-    rmm::device_vector<vertex_t> mst_src(2 * v - 2,
-                                         std::numeric_limits<vertex_t>::max());
-    rmm::device_vector<vertex_t> mst_dst(2 * v - 2,
-                                         std::numeric_limits<vertex_t>::max());
-    rmm::device_vector<vertex_t> color(v, 0);
+    rmm::device_uvector<vertex_t> mst_src(2 * v - 2, handle.get_stream());
+    rmm::device_uvector<vertex_t> mst_dst(2 * v - 2, handle.get_stream());
+    rmm::device_uvector<vertex_t> color(v, handle.get_stream());
+
+    CUDA_CHECK(
+      cudaMemsetAsync(mst_src.data(), std::numeric_limits<vertex_t>::max(),
+                      mst_src.size() * sizeof(vertex_t), handle.get_stream()));
+    CUDA_CHECK(
+      cudaMemsetAsync(mst_dst.data(), std::numeric_limits<vertex_t>::max(),
+                      mst_dst.size() * sizeof(vertex_t), handle.get_stream()));
+    CUDA_CHECK(cudaMemsetAsync(color.data(), 0, color.size() * sizeof(vertex_t),
+                               handle.get_stream()));
 
     vertex_t *color_ptr = thrust::raw_pointer_cast(color.data());
 
@@ -214,7 +221,6 @@ class MSTTest
  protected:
   MSTTestInput<vertex_t, edge_t, weight_t> mst_input;
   CSRDevice<vertex_t, edge_t, weight_t> csr_d;
-  rmm::device_vector<bool> mst_edge;
   vertex_t v;
   edge_t e;
   int iterations;