Use rmm::exec_policy instead of thrust::cuda::par.on

rapidsai · Jul 1, 2021 · a3c9392 · a3c9392
1 parent eb1253a
commit a3c9392
Show file tree

Hide file tree

Showing 10 changed files with 27 additions and 23 deletions.
diff --git a/cpp/include/raft/linalg/init.h b/cpp/include/raft/linalg/init.h
@@ -19,6 +19,7 @@
 #include <thrust/copy.h>
 #include <thrust/device_ptr.h>
 #include <thrust/iterator/counting_iterator.h>
+#include <rmm/exec_policy.hpp>
 
 namespace raft {
 namespace linalg {
@@ -40,7 +41,7 @@ void range(T *out, int start, int end, cudaStream_t stream) {
   thrust::counting_iterator<int> first(start);
   thrust::counting_iterator<int> last = first + (end - start);
   thrust::device_ptr<T> ptr(out);
-  thrust::copy(thrust::cuda::par.on(stream), first, last, ptr);
+  thrust::copy(rmm::exec_policy(stream), first, last, ptr);
 }
 
 /**

diff --git a/cpp/include/raft/linalg/transpose.h b/cpp/include/raft/linalg/transpose.h
@@ -19,6 +19,7 @@
 #include <raft/linalg/cublas_wrappers.h>
 #include <thrust/device_vector.h>
 #include <raft/handle.hpp>
+#include <rmm/exec_policy.hpp>
 
 namespace raft {
 namespace linalg {
@@ -60,7 +61,7 @@ void transpose(math_t *inout, int n, cudaStream_t stream) {
   auto d_inout = inout;
   auto counting = thrust::make_counting_iterator<int>(0);
 
-  thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size,
+  thrust::for_each(rmm::exec_policy(stream), counting, counting + size,
                    [=] __device__(int idx) {
                      int s_row = idx % m;
                      int s_col = idx / m;

diff --git a/cpp/include/raft/matrix/matrix.cuh b/cpp/include/raft/matrix/matrix.cuh
@@ -27,6 +27,7 @@
 #include <raft/cache/cache_util.cuh>
 #include <raft/cuda_utils.cuh>
 #include <raft/handle.hpp>
+#include <rmm/exec_policy.hpp>
 
 namespace raft {
 namespace matrix {
@@ -64,7 +65,7 @@ void copyRows(const m_t *in, idx_t n_rows, idx_t n_cols, m_t *out,
   idx_t size = n_rows_indices * n_cols;
   auto counting = thrust::make_counting_iterator<idx_t>(0);
 
-  thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size,
+  thrust::for_each(rmm::exec_policy(stream), counting, counting + size,
                    [=] __device__(idx_t idx) {
                      idx_t row = idx % n_rows_indices;
                      idx_t col = idx / n_rows_indices;
@@ -108,7 +109,7 @@ void truncZeroOrigin(m_t *in, idx_t in_n_rows, m_t *out, idx_t out_n_rows,
   auto d_q_trunc = out;
   auto counting = thrust::make_counting_iterator<idx_t>(0);
 
-  thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size,
+  thrust::for_each(rmm::exec_policy(stream), counting, counting + size,
                    [=] __device__(idx_t idx) {
                      idx_t row = idx % m;
                      idx_t col = idx / m;
@@ -133,8 +134,8 @@ void colReverse(m_t *inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) {
   auto d_q_reversed = inout;
   auto counting = thrust::make_counting_iterator<idx_t>(0);
 
-  thrust::for_each(thrust::cuda::par.on(stream), counting,
-                   counting + (size / 2), [=] __device__(idx_t idx) {
+  thrust::for_each(rmm::exec_policy(stream), counting, counting + (size / 2),
+                   [=] __device__(idx_t idx) {
                      idx_t dest_row = idx % m;
                      idx_t dest_col = idx / m;
                      idx_t src_row = dest_row;
@@ -161,8 +162,8 @@ void rowReverse(m_t *inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) {
   auto d_q_reversed = inout;
   auto counting = thrust::make_counting_iterator<idx_t>(0);
 
-  thrust::for_each(thrust::cuda::par.on(stream), counting,
-                   counting + (size / 2), [=] __device__(idx_t idx) {
+  thrust::for_each(rmm::exec_policy(stream), counting, counting + (size / 2),
+                   [=] __device__(idx_t idx) {
                      idx_t dest_row = idx % m;
                      idx_t dest_col = idx / m;
                      idx_t src_row = (m - dest_row) - 1;

diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh
@@ -160,7 +160,7 @@ void sorted_coo_to_csr(const T *rows, int nnz, T *row_ind, int m,
   thrust::device_ptr<T> row_counts_d =
     thrust::device_pointer_cast(row_counts.data());
   thrust::device_ptr<T> c_ind_d = thrust::device_pointer_cast(row_ind);
-  exclusive_scan(thrust::cuda::par.on(stream), row_counts_d, row_counts_d + m,
+  exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m,
                  c_ind_d);
 }
 

diff --git a/cpp/include/raft/sparse/linalg/add.cuh b/cpp/include/raft/sparse/linalg/add.cuh
@@ -22,6 +22,7 @@
 #include <raft/sparse/cusparse_wrappers.h>
 #include <raft/cuda_utils.cuh>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/device_ptr.h>
 #include <thrust/scan.h>
@@ -181,7 +182,7 @@ size_t csr_add_calc_inds(const int *a_ind, const int *a_indptr, const T *a_val,
   thrust::device_ptr<int> row_counts_d =
     thrust::device_pointer_cast(row_counts.data());
   thrust::device_ptr<int> c_ind_d = thrust::device_pointer_cast(out_ind);
-  exclusive_scan(thrust::cuda::par.on(stream), row_counts_d, row_counts_d + m,
+  exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m,
                  c_ind_d);
 
   return cnnz;

diff --git a/cpp/include/raft/sparse/linalg/symmetrize.cuh b/cpp/include/raft/sparse/linalg/symmetrize.cuh
@@ -22,6 +22,7 @@
 #include <raft/sparse/cusparse_wrappers.h>
 #include <raft/cuda_utils.cuh>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <raft/sparse/op/sort.h>
 #include <thrust/device_ptr.h>
@@ -293,8 +294,8 @@ void from_knn_symmetrize_matrix(const value_idx *restrict knn_indices,
     thrust::device_pointer_cast(row_sizes.data());
 
   // Rolling cumulative sum
-  thrust::exclusive_scan(thrust::cuda::par.on(stream), __row_sizes,
-                         __row_sizes + n, __edges);
+  thrust::exclusive_scan(rmm::exec_policy(stream), __row_sizes, __row_sizes + n,
+                         __edges);
 
   // (5) Perform final data + data.T operation in tandem with memcpying
   symmetric_sum<<<numBlocks, threadsPerBlock, 0, stream>>>(

diff --git a/cpp/include/raft/sparse/op/sort.h b/cpp/include/raft/sparse/op/sort.h
@@ -21,7 +21,7 @@
 #include <raft/cudart_utils.h>
 #include <raft/sparse/cusparse_wrappers.h>
 #include <raft/cuda_utils.cuh>
-#include <raft/mr/device/buffer.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/device_ptr.h>
 #include <thrust/scan.h>
@@ -69,8 +69,8 @@ void coo_sort(int m, int n, int nnz, int *rows, int *cols, T *vals,
   auto coo_indices = thrust::make_zip_iterator(thrust::make_tuple(rows, cols));
 
   // get all the colors in contiguous locations so we can map them to warps.
-  thrust::sort_by_key(thrust::cuda::par.on(stream), coo_indices,
-                      coo_indices + nnz, vals, TupleComp());
+  thrust::sort_by_key(rmm::exec_policy(stream), coo_indices, coo_indices + nnz,
+                      vals, TupleComp());
 }
 
 /**
@@ -104,8 +104,7 @@ void coo_sort_by_weight(value_idx *rows, value_idx *cols, value_t *data,
 
   auto first = thrust::make_zip_iterator(thrust::make_tuple(rows, cols));
 
-  thrust::sort_by_key(thrust::cuda::par.on(stream), t_data, t_data + nnz,
-                      first);
+  thrust::sort_by_key(rmm::exec_policy(stream), t_data, t_data + nnz, first);
 }
 };  // namespace op
 };  // end NAMESPACE sparse

diff --git a/cpp/include/raft/sparse/selection/connect_components.cuh b/cpp/include/raft/sparse/selection/connect_components.cuh
@@ -30,6 +30,7 @@
 #include <thrust/device_ptr.h>
 #include <thrust/sort.h>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <cub/cub.cuh>
 
@@ -211,7 +212,7 @@ void perform_1nn(cub::KeyValuePair<value_idx, value_t> *kvp,
     workspace.data(), reduction_op, reduction_op, true, true, stream);
 
   LookupColorOp<value_idx, value_t> extract_colors_op(colors);
-  thrust::transform(thrust::cuda::par.on(stream), kvp, kvp + n_rows, nn_colors,
+  thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors,
                     extract_colors_op);
 }
 
@@ -232,15 +233,15 @@ void sort_by_color(value_idx *colors, value_idx *nn_colors,
                    cub::KeyValuePair<value_idx, value_t> *kvp,
                    value_idx *src_indices, size_t n_rows, cudaStream_t stream) {
   thrust::counting_iterator<value_idx> arg_sort_iter(0);
-  thrust::copy(thrust::cuda::par.on(stream), arg_sort_iter,
-               arg_sort_iter + n_rows, src_indices);
+  thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows,
+               src_indices);
 
   auto keys = thrust::make_zip_iterator(thrust::make_tuple(
     colors, nn_colors, (raft::linkage::KeyValuePair<value_idx, value_t> *)kvp));
   auto vals = thrust::make_zip_iterator(thrust::make_tuple(src_indices));
 
   // get all the colors in contiguous locations so we can map them to warps.
-  thrust::sort_by_key(thrust::cuda::par.on(stream), keys, keys + n_rows, vals,
+  thrust::sort_by_key(rmm::exec_policy(stream), keys, keys + n_rows, vals,
                       TupleComp());
 }
 

diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/eigen_solvers.cu
@@ -38,7 +38,6 @@ TEST(Raft, EigenSolvers) {
   index_type nnz = 0;
   index_type nrows = 0;
   auto stream = h.get_stream();
-  auto t_exe_pol = thrust::cuda::par.on(stream);
 
   sparse_matrix_t<index_type, value_type> sm1{h, ro, ci, vs, nrows, nnz};
   ASSERT_EQ(nullptr, sm1.row_offsets_);

diff --git a/cpp/test/matrix/matrix.cu b/cpp/test/matrix/matrix.cu
@@ -112,7 +112,7 @@ class MatrixCopyRowsTest : public ::testing::Test {
     // Init input array
     thrust::counting_iterator<idx_t> first(0);
     thrust::device_ptr<math_t> ptr(input.data());
-    thrust::copy(thrust::cuda::par.on(stream), first, first + n_cols * n_rows,
+    thrust::copy(handle.get_thrust_policy(), first, first + n_cols * n_rows,
                  ptr);
   }