From a3c9392721370c79e85d7d3bb546e16fe3e280e4 Mon Sep 17 00:00:00 2001 From: viclafargue Date: Thu, 1 Jul 2021 12:05:58 +0200 Subject: [PATCH] Use rmm::exec_policy instead of thrust::cuda::par.on --- cpp/include/raft/linalg/init.h | 3 ++- cpp/include/raft/linalg/transpose.h | 3 ++- cpp/include/raft/matrix/matrix.cuh | 13 +++++++------ cpp/include/raft/sparse/convert/csr.cuh | 2 +- cpp/include/raft/sparse/linalg/add.cuh | 3 ++- cpp/include/raft/sparse/linalg/symmetrize.cuh | 5 +++-- cpp/include/raft/sparse/op/sort.h | 9 ++++----- .../raft/sparse/selection/connect_components.cuh | 9 +++++---- cpp/test/eigen_solvers.cu | 1 - cpp/test/matrix/matrix.cu | 2 +- 10 files changed, 27 insertions(+), 23 deletions(-) diff --git a/cpp/include/raft/linalg/init.h b/cpp/include/raft/linalg/init.h index cb2e8ed1ab..9944685a1f 100644 --- a/cpp/include/raft/linalg/init.h +++ b/cpp/include/raft/linalg/init.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace raft { namespace linalg { @@ -40,7 +41,7 @@ void range(T *out, int start, int end, cudaStream_t stream) { thrust::counting_iterator first(start); thrust::counting_iterator last = first + (end - start); thrust::device_ptr ptr(out); - thrust::copy(thrust::cuda::par.on(stream), first, last, ptr); + thrust::copy(rmm::exec_policy(stream), first, last, ptr); } /** diff --git a/cpp/include/raft/linalg/transpose.h b/cpp/include/raft/linalg/transpose.h index d90f6271fa..fef0007eb4 100644 --- a/cpp/include/raft/linalg/transpose.h +++ b/cpp/include/raft/linalg/transpose.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace raft { namespace linalg { @@ -60,7 +61,7 @@ void transpose(math_t *inout, int n, cudaStream_t stream) { auto d_inout = inout; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size, + thrust::for_each(rmm::exec_policy(stream), counting, counting + size, [=] __device__(int idx) { int s_row = idx % m; int s_col = idx / m; diff --git a/cpp/include/raft/matrix/matrix.cuh b/cpp/include/raft/matrix/matrix.cuh index 5f5755e24e..8231baaad9 100644 --- a/cpp/include/raft/matrix/matrix.cuh +++ b/cpp/include/raft/matrix/matrix.cuh @@ -27,6 +27,7 @@ #include #include #include +#include namespace raft { namespace matrix { @@ -64,7 +65,7 @@ void copyRows(const m_t *in, idx_t n_rows, idx_t n_cols, m_t *out, idx_t size = n_rows_indices * n_cols; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size, + thrust::for_each(rmm::exec_policy(stream), counting, counting + size, [=] __device__(idx_t idx) { idx_t row = idx % n_rows_indices; idx_t col = idx / n_rows_indices; @@ -108,7 +109,7 @@ void truncZeroOrigin(m_t *in, idx_t in_n_rows, m_t *out, idx_t out_n_rows, auto d_q_trunc = out; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size, + thrust::for_each(rmm::exec_policy(stream), counting, counting + size, [=] __device__(idx_t idx) { idx_t row = idx % m; idx_t col = idx / m; @@ -133,8 +134,8 @@ void colReverse(m_t *inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) { auto d_q_reversed = inout; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(thrust::cuda::par.on(stream), counting, - counting + (size / 2), [=] __device__(idx_t idx) { + thrust::for_each(rmm::exec_policy(stream), counting, counting + (size / 2), + [=] __device__(idx_t idx) { idx_t dest_row = idx % m; idx_t dest_col = idx / m; idx_t src_row = dest_row; @@ -161,8 +162,8 @@ void rowReverse(m_t *inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) { auto d_q_reversed = inout; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(thrust::cuda::par.on(stream), counting, - counting + (size / 2), [=] __device__(idx_t idx) { + thrust::for_each(rmm::exec_policy(stream), counting, counting + (size / 2), + [=] __device__(idx_t idx) { idx_t dest_row = idx % m; idx_t dest_col = idx / m; idx_t src_row = (m - dest_row) - 1; diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh index 16f351bf48..79b18ebd0a 100644 --- a/cpp/include/raft/sparse/convert/csr.cuh +++ b/cpp/include/raft/sparse/convert/csr.cuh @@ -160,7 +160,7 @@ void sorted_coo_to_csr(const T *rows, int nnz, T *row_ind, int m, thrust::device_ptr row_counts_d = thrust::device_pointer_cast(row_counts.data()); thrust::device_ptr c_ind_d = thrust::device_pointer_cast(row_ind); - exclusive_scan(thrust::cuda::par.on(stream), row_counts_d, row_counts_d + m, + exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m, c_ind_d); } diff --git a/cpp/include/raft/sparse/linalg/add.cuh b/cpp/include/raft/sparse/linalg/add.cuh index 3bf028d14a..7ed627b9e2 100644 --- a/cpp/include/raft/sparse/linalg/add.cuh +++ b/cpp/include/raft/sparse/linalg/add.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -181,7 +182,7 @@ size_t csr_add_calc_inds(const int *a_ind, const int *a_indptr, const T *a_val, thrust::device_ptr row_counts_d = thrust::device_pointer_cast(row_counts.data()); thrust::device_ptr c_ind_d = thrust::device_pointer_cast(out_ind); - exclusive_scan(thrust::cuda::par.on(stream), row_counts_d, row_counts_d + m, + exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m, c_ind_d); return cnnz; diff --git a/cpp/include/raft/sparse/linalg/symmetrize.cuh b/cpp/include/raft/sparse/linalg/symmetrize.cuh index 614c9d830e..a6e1027288 100644 --- a/cpp/include/raft/sparse/linalg/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/symmetrize.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -293,8 +294,8 @@ void from_knn_symmetrize_matrix(const value_idx *restrict knn_indices, thrust::device_pointer_cast(row_sizes.data()); // Rolling cumulative sum - thrust::exclusive_scan(thrust::cuda::par.on(stream), __row_sizes, - __row_sizes + n, __edges); + thrust::exclusive_scan(rmm::exec_policy(stream), __row_sizes, __row_sizes + n, + __edges); // (5) Perform final data + data.T operation in tandem with memcpying symmetric_sum<<>>( diff --git a/cpp/include/raft/sparse/op/sort.h b/cpp/include/raft/sparse/op/sort.h index d53ceb62a9..9414a11ade 100644 --- a/cpp/include/raft/sparse/op/sort.h +++ b/cpp/include/raft/sparse/op/sort.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -69,8 +69,8 @@ void coo_sort(int m, int n, int nnz, int *rows, int *cols, T *vals, auto coo_indices = thrust::make_zip_iterator(thrust::make_tuple(rows, cols)); // get all the colors in contiguous locations so we can map them to warps. - thrust::sort_by_key(thrust::cuda::par.on(stream), coo_indices, - coo_indices + nnz, vals, TupleComp()); + thrust::sort_by_key(rmm::exec_policy(stream), coo_indices, coo_indices + nnz, + vals, TupleComp()); } /** @@ -104,8 +104,7 @@ void coo_sort_by_weight(value_idx *rows, value_idx *cols, value_t *data, auto first = thrust::make_zip_iterator(thrust::make_tuple(rows, cols)); - thrust::sort_by_key(thrust::cuda::par.on(stream), t_data, t_data + nnz, - first); + thrust::sort_by_key(rmm::exec_policy(stream), t_data, t_data + nnz, first); } }; // namespace op }; // end NAMESPACE sparse diff --git a/cpp/include/raft/sparse/selection/connect_components.cuh b/cpp/include/raft/sparse/selection/connect_components.cuh index 390522c9bc..46369ca964 100644 --- a/cpp/include/raft/sparse/selection/connect_components.cuh +++ b/cpp/include/raft/sparse/selection/connect_components.cuh @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -211,7 +212,7 @@ void perform_1nn(cub::KeyValuePair *kvp, workspace.data(), reduction_op, reduction_op, true, true, stream); LookupColorOp extract_colors_op(colors); - thrust::transform(thrust::cuda::par.on(stream), kvp, kvp + n_rows, nn_colors, + thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors, extract_colors_op); } @@ -232,15 +233,15 @@ void sort_by_color(value_idx *colors, value_idx *nn_colors, cub::KeyValuePair *kvp, value_idx *src_indices, size_t n_rows, cudaStream_t stream) { thrust::counting_iterator arg_sort_iter(0); - thrust::copy(thrust::cuda::par.on(stream), arg_sort_iter, - arg_sort_iter + n_rows, src_indices); + thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows, + src_indices); auto keys = thrust::make_zip_iterator(thrust::make_tuple( colors, nn_colors, (raft::linkage::KeyValuePair *)kvp)); auto vals = thrust::make_zip_iterator(thrust::make_tuple(src_indices)); // get all the colors in contiguous locations so we can map them to warps. - thrust::sort_by_key(thrust::cuda::par.on(stream), keys, keys + n_rows, vals, + thrust::sort_by_key(rmm::exec_policy(stream), keys, keys + n_rows, vals, TupleComp()); } diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/eigen_solvers.cu index 8025d8dcd6..ef67d95348 100644 --- a/cpp/test/eigen_solvers.cu +++ b/cpp/test/eigen_solvers.cu @@ -38,7 +38,6 @@ TEST(Raft, EigenSolvers) { index_type nnz = 0; index_type nrows = 0; auto stream = h.get_stream(); - auto t_exe_pol = thrust::cuda::par.on(stream); sparse_matrix_t sm1{h, ro, ci, vs, nrows, nnz}; ASSERT_EQ(nullptr, sm1.row_offsets_); diff --git a/cpp/test/matrix/matrix.cu b/cpp/test/matrix/matrix.cu index e7da92a136..2d2d9d2057 100644 --- a/cpp/test/matrix/matrix.cu +++ b/cpp/test/matrix/matrix.cu @@ -112,7 +112,7 @@ class MatrixCopyRowsTest : public ::testing::Test { // Init input array thrust::counting_iterator first(0); thrust::device_ptr ptr(input.data()); - thrust::copy(thrust::cuda::par.on(stream), first, first + n_cols * n_rows, + thrust::copy(handle.get_thrust_policy(), first, first + n_cols * n_rows, ptr); }