From f6fe37a9718adc91a472f47aa357cc438930c69b Mon Sep 17 00:00:00 2001 From: viclafargue Date: Mon, 5 Jul 2021 12:04:35 +0200 Subject: [PATCH] Use rmm::exec_policy instead of thrust::cuda::par.on + remove rmm::device_vector --- cpp/include/raft/linalg/init.h | 3 +- cpp/include/raft/linalg/transpose.h | 4 +- cpp/include/raft/matrix/matrix.cuh | 14 +-- cpp/include/raft/sparse/convert/csr.cuh | 2 +- cpp/include/raft/sparse/coo.cuh | 1 - .../coo_spmv_strategies/base_strategy.cuh | 1 - cpp/include/raft/sparse/distance/utils.cuh | 2 - cpp/include/raft/sparse/linalg/add.cuh | 3 +- cpp/include/raft/sparse/linalg/symmetrize.cuh | 5 +- .../raft/sparse/mst/detail/mst_solver_inl.cuh | 110 +++++++++--------- cpp/include/raft/sparse/mst/detail/utils.cuh | 4 +- cpp/include/raft/sparse/mst/mst_solver.cuh | 24 ++-- cpp/include/raft/sparse/op/sort.h | 9 +- .../sparse/selection/connect_components.cuh | 9 +- .../knn/detail/ann_quantized_faiss.cuh | 1 - .../knn/detail/knn_brute_force_faiss.cuh | 1 - cpp/include/raft/spectral/kmeans.hpp | 1 - .../raft/spectral/modularity_maximization.hpp | 1 - cpp/include/raft/spectral/partition.hpp | 1 - cpp/include/raft/spectral/spectral_util.hpp | 1 - cpp/test/eigen_solvers.cu | 1 - cpp/test/linalg/reduce.cuh | 15 ++- cpp/test/matrix/matrix.cu | 2 +- cpp/test/mst.cu | 20 ++-- 24 files changed, 118 insertions(+), 117 deletions(-) diff --git a/cpp/include/raft/linalg/init.h b/cpp/include/raft/linalg/init.h index cb2e8ed1ab..9944685a1f 100644 --- a/cpp/include/raft/linalg/init.h +++ b/cpp/include/raft/linalg/init.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace raft { namespace linalg { @@ -40,7 +41,7 @@ void range(T *out, int start, int end, cudaStream_t stream) { thrust::counting_iterator first(start); thrust::counting_iterator last = first + (end - start); thrust::device_ptr ptr(out); - thrust::copy(thrust::cuda::par.on(stream), first, last, ptr); + thrust::copy(rmm::exec_policy(stream), first, last, ptr); } /** diff --git a/cpp/include/raft/linalg/transpose.h b/cpp/include/raft/linalg/transpose.h index d90f6271fa..db1cabd694 100644 --- a/cpp/include/raft/linalg/transpose.h +++ b/cpp/include/raft/linalg/transpose.h @@ -17,8 +17,8 @@ #pragma once #include -#include #include +#include namespace raft { namespace linalg { @@ -60,7 +60,7 @@ void transpose(math_t *inout, int n, cudaStream_t stream) { auto d_inout = inout; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size, + thrust::for_each(rmm::exec_policy(stream), counting, counting + size, [=] __device__(int idx) { int s_row = idx % m; int s_col = idx / m; diff --git a/cpp/include/raft/matrix/matrix.cuh b/cpp/include/raft/matrix/matrix.cuh index 5f5755e24e..688b92da09 100644 --- a/cpp/include/raft/matrix/matrix.cuh +++ b/cpp/include/raft/matrix/matrix.cuh @@ -20,13 +20,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include namespace raft { namespace matrix { @@ -64,7 +64,7 @@ void copyRows(const m_t *in, idx_t n_rows, idx_t n_cols, m_t *out, idx_t size = n_rows_indices * n_cols; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size, + thrust::for_each(rmm::exec_policy(stream), counting, counting + size, [=] __device__(idx_t idx) { idx_t row = idx % n_rows_indices; idx_t col = idx / n_rows_indices; @@ -108,7 +108,7 @@ void truncZeroOrigin(m_t *in, idx_t in_n_rows, m_t *out, idx_t out_n_rows, auto d_q_trunc = out; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(thrust::cuda::par.on(stream), counting, counting + size, + thrust::for_each(rmm::exec_policy(stream), counting, counting + size, [=] __device__(idx_t idx) { idx_t row = idx % m; idx_t col = idx / m; @@ -133,8 +133,8 @@ void colReverse(m_t *inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) { auto d_q_reversed = inout; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(thrust::cuda::par.on(stream), counting, - counting + (size / 2), [=] __device__(idx_t idx) { + thrust::for_each(rmm::exec_policy(stream), counting, counting + (size / 2), + [=] __device__(idx_t idx) { idx_t dest_row = idx % m; idx_t dest_col = idx / m; idx_t src_row = dest_row; @@ -161,8 +161,8 @@ void rowReverse(m_t *inout, idx_t n_rows, idx_t n_cols, cudaStream_t stream) { auto d_q_reversed = inout; auto counting = thrust::make_counting_iterator(0); - thrust::for_each(thrust::cuda::par.on(stream), counting, - counting + (size / 2), [=] __device__(idx_t idx) { + thrust::for_each(rmm::exec_policy(stream), counting, counting + (size / 2), + [=] __device__(idx_t idx) { idx_t dest_row = idx % m; idx_t dest_col = idx / m; idx_t src_row = (m - dest_row) - 1; diff --git a/cpp/include/raft/sparse/convert/csr.cuh b/cpp/include/raft/sparse/convert/csr.cuh index 16f351bf48..79b18ebd0a 100644 --- a/cpp/include/raft/sparse/convert/csr.cuh +++ b/cpp/include/raft/sparse/convert/csr.cuh @@ -160,7 +160,7 @@ void sorted_coo_to_csr(const T *rows, int nnz, T *row_ind, int m, thrust::device_ptr row_counts_d = thrust::device_pointer_cast(row_counts.data()); thrust::device_ptr c_ind_d = thrust::device_pointer_cast(row_ind); - exclusive_scan(thrust::cuda::par.on(stream), row_counts_d, row_counts_d + m, + exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m, c_ind_d); } diff --git a/cpp/include/raft/sparse/coo.cuh b/cpp/include/raft/sparse/coo.cuh index 6af8eae395..fa21614f8f 100644 --- a/cpp/include/raft/sparse/coo.cuh +++ b/cpp/include/raft/sparse/coo.cuh @@ -22,7 +22,6 @@ #include #include -#include #include #include diff --git a/cpp/include/raft/sparse/distance/coo_spmv_strategies/base_strategy.cuh b/cpp/include/raft/sparse/distance/coo_spmv_strategies/base_strategy.cuh index 194799aed0..3b57225350 100644 --- a/cpp/include/raft/sparse/distance/coo_spmv_strategies/base_strategy.cuh +++ b/cpp/include/raft/sparse/distance/coo_spmv_strategies/base_strategy.cuh @@ -22,7 +22,6 @@ #include "coo_mask_row_iterators.cuh" #include -#include namespace raft { namespace sparse { diff --git a/cpp/include/raft/sparse/distance/utils.cuh b/cpp/include/raft/sparse/distance/utils.cuh index 6b6d77a2d5..3bee1bc87d 100644 --- a/cpp/include/raft/sparse/distance/utils.cuh +++ b/cpp/include/raft/sparse/distance/utils.cuh @@ -21,8 +21,6 @@ #include -#include - namespace raft { namespace sparse { namespace distance { diff --git a/cpp/include/raft/sparse/linalg/add.cuh b/cpp/include/raft/sparse/linalg/add.cuh index 3bf028d14a..7ed627b9e2 100644 --- a/cpp/include/raft/sparse/linalg/add.cuh +++ b/cpp/include/raft/sparse/linalg/add.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -181,7 +182,7 @@ size_t csr_add_calc_inds(const int *a_ind, const int *a_indptr, const T *a_val, thrust::device_ptr row_counts_d = thrust::device_pointer_cast(row_counts.data()); thrust::device_ptr c_ind_d = thrust::device_pointer_cast(out_ind); - exclusive_scan(thrust::cuda::par.on(stream), row_counts_d, row_counts_d + m, + exclusive_scan(rmm::exec_policy(stream), row_counts_d, row_counts_d + m, c_ind_d); return cnnz; diff --git a/cpp/include/raft/sparse/linalg/symmetrize.cuh b/cpp/include/raft/sparse/linalg/symmetrize.cuh index 614c9d830e..a6e1027288 100644 --- a/cpp/include/raft/sparse/linalg/symmetrize.cuh +++ b/cpp/include/raft/sparse/linalg/symmetrize.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -293,8 +294,8 @@ void from_knn_symmetrize_matrix(const value_idx *restrict knn_indices, thrust::device_pointer_cast(row_sizes.data()); // Rolling cumulative sum - thrust::exclusive_scan(thrust::cuda::par.on(stream), __row_sizes, - __row_sizes + n, __edges); + thrust::exclusive_scan(rmm::exec_policy(stream), __row_sizes, __row_sizes + n, + __edges); // (5) Perform final data + data.T operation in tandem with memcpying symmetric_sum<<>>( diff --git a/cpp/include/raft/sparse/mst/detail/mst_solver_inl.cuh b/cpp/include/raft/sparse/mst/detail/mst_solver_inl.cuh index 029b76a945..33b980afcd 100644 --- a/cpp/include/raft/sparse/mst/detail/mst_solver_inl.cuh +++ b/cpp/include/raft/sparse/mst/detail/mst_solver_inl.cuh @@ -23,10 +23,10 @@ #include "utils.cuh" #include -#include +#include +#include #include -#include #include #include #include @@ -34,10 +34,6 @@ #include #include -#include - -#include - namespace raft { namespace mst { typedef std::chrono::high_resolution_clock Clock; @@ -63,20 +59,20 @@ MST_solver::MST_solver( offsets(offsets_), indices(indices_), weights(weights_), - altered_weights(e_), + altered_weights(e_, stream_), v(v_), e(e_), color_index(color_), - color(v_), - next_color(v_), - min_edge_color(v_), - new_mst_edge(v_), - mst_edge(e_, false), - temp_src(2 * v_), - temp_dst(2 * v_), - temp_weights(2 * v_), - mst_edge_count(1, 0), - prev_mst_edge_count(1, 0), + color(v_, stream_), + next_color(v_, stream_), + min_edge_color(v_, stream_), + new_mst_edge(v_, stream_), + mst_edge(e_, stream_), + temp_src(2 * v_, stream_), + temp_dst(2 * v_, stream_), + temp_weights(2 * v_, stream_), + mst_edge_count(1, stream_), + prev_mst_edge_count(1, stream_), stream(stream_), symmetrize_output(symmetrize_output_), initialize_colors(initialize_colors_), @@ -85,13 +81,18 @@ MST_solver::MST_solver( max_threads = handle_.get_device_properties().maxThreadsPerBlock; sm_count = handle_.get_device_properties().multiProcessorCount; + mst_edge_count.set_value_to_zero_async(stream); + prev_mst_edge_count.set_value_to_zero_async(stream); + CUDA_CHECK(cudaMemsetAsync(mst_edge.data(), 0, mst_edge.size() * sizeof(bool), + stream)); + //Initially, color holds the vertex id as color auto policy = handle.get_thrust_policy(); if (initialize_colors_) { thrust::sequence(policy, color.begin(), color.end(), 0); thrust::sequence(policy, color_index, color_index + v, 0); } else { - raft::copy(color.data().get(), color_index, v, stream); + raft::copy(color.data(), color_index, v, stream); } thrust::sequence(policy, next_color.begin(), next_color.end(), 0); } @@ -158,12 +159,12 @@ MST_solver::solve() { timer3 += duration_us(stop - start); #endif - auto curr_mst_edge_count = mst_edge_count[0]; + auto curr_mst_edge_count = mst_edge_count.value(stream); RAFT_EXPECTS(curr_mst_edge_count <= max_mst_edges, "Number of edges found by MST is invalid. This may be due to " "loss in precision. Try increasing precision of weights."); - if (curr_mst_edge_count == prev_mst_edge_count[0]) { + if (curr_mst_edge_count == prev_mst_edge_count.value(stream)) { #ifdef MST_TIME std::cout << "Iterations: " << i << std::endl; std::cout << timer0 << "," << timer1 << "," << timer2 << "," << timer3 @@ -194,12 +195,11 @@ MST_solver::solve() { #endif // copy this iteration's results and store - prev_mst_edge_count = mst_edge_count; + prev_mst_edge_count.set_value_async(curr_mst_edge_count, stream); } // result packaging - thrust::host_vector host_mst_edge_count = mst_edge_count; - mst_result.n_edges = host_mst_edge_count[0]; + mst_result.n_edges = mst_edge_count.value(stream); mst_result.src.resize(mst_result.n_edges, stream); mst_result.dst.resize(mst_result.n_edges, stream); mst_result.weights.resize(mst_result.n_edges, stream); @@ -226,7 +226,7 @@ template ::alteration_max() { auto policy = handle.get_thrust_policy(); - rmm::device_vector tmp(e); + rmm::device_uvector tmp(e, stream); thrust::device_ptr weights_ptr(weights); thrust::copy(policy, weights_ptr, weights_ptr + e, tmp.begin()); //sort tmp weights @@ -240,7 +240,7 @@ MST_solver::alteration_max() { thrust::make_zip_iterator(thrust::make_tuple(tmp.begin(), tmp.begin() + 1)); auto end = thrust::make_zip_iterator(thrust::make_tuple(new_end - 1, new_end)); - auto init = tmp[1] - tmp[0]; + auto init = tmp.element(1, stream) - tmp.element(0, stream); auto max = thrust::transform_reduce(policy, begin, end, alteration_functor(), init, thrust::minimum()); @@ -259,7 +259,7 @@ void MST_solver::alteration() { alteration_t max = alteration_max(); // pool of rand values - rmm::device_vector rand_values(v); + rmm::device_uvector rand_values(v, stream); // Random number generator curandGenerator_t randGen; @@ -267,8 +267,7 @@ void MST_solver::alteration() { curandSetPseudoRandomGeneratorSeed(randGen, 1234567); // Initialize rand values - auto curand_status = - curand_generate_uniformX(randGen, rand_values.data().get(), v); + auto curand_status = curand_generate_uniformX(randGen, rand_values.data(), v); RAFT_EXPECTS(curand_status == CURAND_STATUS_SUCCESS, "MST: CURAND failed"); curand_status = curandDestroyGenerator(randGen); RAFT_EXPECTS(curand_status == CURAND_STATUS_SUCCESS, @@ -276,8 +275,8 @@ void MST_solver::alteration() { //Alterate the weights, make all undirected edge weight unique while keeping Wuv == Wvu detail::alteration_kernel<<>>( - v, e, offsets, indices, weights, max, rand_values.data().get(), - altered_weights.data().get()); + v, e, offsets, indices, weights, max, rand_values.data(), + altered_weights.data()); } // updates colors of vertices by propagating the lower color to the higher @@ -286,23 +285,24 @@ template ::label_prop( vertex_t* mst_src, vertex_t* mst_dst) { // update the colors of both ends its until there is no change in colors - thrust::host_vector curr_mst_edge_count = mst_edge_count; + edge_t curr_mst_edge_count = mst_edge_count.value(stream); auto min_pair_nthreads = std::min(v, (vertex_t)max_threads); auto min_pair_nblocks = std::min( (v + min_pair_nthreads - 1) / min_pair_nthreads, (vertex_t)max_blocks); - rmm::device_vector done(1, false); - - edge_t* new_mst_edge_ptr = new_mst_edge.data().get(); - vertex_t* color_ptr = color.data().get(); - vertex_t* next_color_ptr = next_color.data().get(); + edge_t* new_mst_edge_ptr = new_mst_edge.data(); + vertex_t* color_ptr = color.data(); + vertex_t* next_color_ptr = next_color.data(); - bool* done_ptr = done.data().get(); + rmm::device_scalar done(stream); + done.set_value_to_zero_async(stream); + bool* done_ptr = done.data(); + const bool true_val = true; auto i = 0; - while (!done[0]) { - done[0] = true; + while (!done.value(stream)) { + done.set_value_async(true_val, stream); detail::min_pair_colors<<>>( v, indices, new_mst_edge_ptr, color_ptr, color_index, next_color_ptr); @@ -333,11 +333,11 @@ void MST_solver>>( offsets, indices, altered_weights_ptr, color_ptr, color_index, @@ -356,14 +356,14 @@ void MST_solver::max()); - vertex_t* color_ptr = color.data().get(); - edge_t* new_mst_edge_ptr = new_mst_edge.data().get(); - bool* mst_edge_ptr = mst_edge.data().get(); - alteration_t* min_edge_color_ptr = min_edge_color.data().get(); - alteration_t* altered_weights_ptr = altered_weights.data().get(); - vertex_t* temp_src_ptr = temp_src.data().get(); - vertex_t* temp_dst_ptr = temp_dst.data().get(); - weight_t* temp_weights_ptr = temp_weights.data().get(); + vertex_t* color_ptr = color.data(); + edge_t* new_mst_edge_ptr = new_mst_edge.data(); + bool* mst_edge_ptr = mst_edge.data(); + alteration_t* min_edge_color_ptr = min_edge_color.data(); + alteration_t* altered_weights_ptr = altered_weights.data(); + vertex_t* temp_src_ptr = temp_src.data(); + vertex_t* temp_dst_ptr = temp_dst.data(); + weight_t* temp_weights_ptr = temp_weights.data(); detail::min_edge_per_supervertex<<>>( color_ptr, color_index, new_mst_edge_ptr, mst_edge_ptr, indices, weights, @@ -388,8 +388,8 @@ void MST_solver::check_termination() { std::min((2 * v + nthreads - 1) / nthreads, (vertex_t)max_blocks); // count number of new mst edges - edge_t* mst_edge_count_ptr = mst_edge_count.data().get(); - vertex_t* temp_src_ptr = temp_src.data().get(); + edge_t* mst_edge_count_ptr = mst_edge_count.data(); + vertex_t* temp_src_ptr = temp_src.data(); detail::kernel_count_new_mst_edges<<>>( temp_src_ptr, mst_edge_count_ptr, 2 * v); @@ -411,7 +411,7 @@ void MST_solver::append_src_dst_pair( vertex_t* mst_src, vertex_t* mst_dst, weight_t* mst_weights) { auto policy = handle.get_thrust_policy(); - auto curr_mst_edge_count = prev_mst_edge_count[0]; + edge_t curr_mst_edge_count = prev_mst_edge_count.value(stream); // iterator to end of mst edges added to final output in previous iteration auto src_dst_zip_end = thrust::make_zip_iterator(thrust::make_tuple( diff --git a/cpp/include/raft/sparse/mst/detail/utils.cuh b/cpp/include/raft/sparse/mst/detail/utils.cuh index 8f755de459..4d5ca6ebe1 100644 --- a/cpp/include/raft/sparse/mst/detail/utils.cuh +++ b/cpp/include/raft/sparse/mst/detail/utils.cuh @@ -18,7 +18,7 @@ #pragma once #include -#include +#include #define MST_TIME namespace raft { @@ -32,7 +32,7 @@ __device__ idx_t get_1D_idx() { // somewhat smart vector print template -void printv(rmm::device_vector& vec, const std::string& name = "", +void printv(rmm::device_uvector& vec, const std::string& name = "", const size_t displ = 5) { #ifdef MST_TIME std::cout.precision(15); diff --git a/cpp/include/raft/sparse/mst/mst_solver.cuh b/cpp/include/raft/sparse/mst/mst_solver.cuh index 833882ea0d..44b34ee5c7 100644 --- a/cpp/include/raft/sparse/mst/mst_solver.cuh +++ b/cpp/include/raft/sparse/mst/mst_solver.cuh @@ -18,8 +18,8 @@ #pragma once #include +#include #include -#include namespace raft { @@ -68,24 +68,24 @@ class MST_solver { vertex_t sm_count; vertex_t* color_index; // represent each supervertex as a color - rmm::device_vector + rmm::device_uvector min_edge_color; // minimum incident edge weight per color - rmm::device_vector new_mst_edge; // new minimum edge per vertex - rmm::device_vector + rmm::device_uvector new_mst_edge; // new minimum edge per vertex + rmm::device_uvector altered_weights; // weights to be used for mst - rmm::device_vector + rmm::device_scalar mst_edge_count; // total number of edges added after every iteration - rmm::device_vector + rmm::device_scalar prev_mst_edge_count; // total number of edges up to the previous iteration - rmm::device_vector + rmm::device_uvector mst_edge; // mst output - true if the edge belongs in mst - rmm::device_vector next_color; // next iteration color - rmm::device_vector color; // index of color that vertex points to + rmm::device_uvector next_color; // next iteration color + rmm::device_uvector color; // index of color that vertex points to // new src-dst pairs found per iteration - rmm::device_vector temp_src; - rmm::device_vector temp_dst; - rmm::device_vector temp_weights; + rmm::device_uvector temp_src; + rmm::device_uvector temp_dst; + rmm::device_uvector temp_weights; void label_prop(vertex_t* mst_src, vertex_t* mst_dst); void min_edge_per_vertex(); diff --git a/cpp/include/raft/sparse/op/sort.h b/cpp/include/raft/sparse/op/sort.h index d53ceb62a9..9414a11ade 100644 --- a/cpp/include/raft/sparse/op/sort.h +++ b/cpp/include/raft/sparse/op/sort.h @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -69,8 +69,8 @@ void coo_sort(int m, int n, int nnz, int *rows, int *cols, T *vals, auto coo_indices = thrust::make_zip_iterator(thrust::make_tuple(rows, cols)); // get all the colors in contiguous locations so we can map them to warps. - thrust::sort_by_key(thrust::cuda::par.on(stream), coo_indices, - coo_indices + nnz, vals, TupleComp()); + thrust::sort_by_key(rmm::exec_policy(stream), coo_indices, coo_indices + nnz, + vals, TupleComp()); } /** @@ -104,8 +104,7 @@ void coo_sort_by_weight(value_idx *rows, value_idx *cols, value_t *data, auto first = thrust::make_zip_iterator(thrust::make_tuple(rows, cols)); - thrust::sort_by_key(thrust::cuda::par.on(stream), t_data, t_data + nnz, - first); + thrust::sort_by_key(rmm::exec_policy(stream), t_data, t_data + nnz, first); } }; // namespace op }; // end NAMESPACE sparse diff --git a/cpp/include/raft/sparse/selection/connect_components.cuh b/cpp/include/raft/sparse/selection/connect_components.cuh index 390522c9bc..46369ca964 100644 --- a/cpp/include/raft/sparse/selection/connect_components.cuh +++ b/cpp/include/raft/sparse/selection/connect_components.cuh @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -211,7 +212,7 @@ void perform_1nn(cub::KeyValuePair *kvp, workspace.data(), reduction_op, reduction_op, true, true, stream); LookupColorOp extract_colors_op(colors); - thrust::transform(thrust::cuda::par.on(stream), kvp, kvp + n_rows, nn_colors, + thrust::transform(rmm::exec_policy(stream), kvp, kvp + n_rows, nn_colors, extract_colors_op); } @@ -232,15 +233,15 @@ void sort_by_color(value_idx *colors, value_idx *nn_colors, cub::KeyValuePair *kvp, value_idx *src_indices, size_t n_rows, cudaStream_t stream) { thrust::counting_iterator arg_sort_iter(0); - thrust::copy(thrust::cuda::par.on(stream), arg_sort_iter, - arg_sort_iter + n_rows, src_indices); + thrust::copy(rmm::exec_policy(stream), arg_sort_iter, arg_sort_iter + n_rows, + src_indices); auto keys = thrust::make_zip_iterator(thrust::make_tuple( colors, nn_colors, (raft::linkage::KeyValuePair *)kvp)); auto vals = thrust::make_zip_iterator(thrust::make_tuple(src_indices)); // get all the colors in contiguous locations so we can map them to warps. - thrust::sort_by_key(thrust::cuda::par.on(stream), keys, keys + n_rows, vals, + thrust::sort_by_key(rmm::exec_policy(stream), keys, keys + n_rows, vals, TupleComp()); } diff --git a/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh b/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh index c0345a01e6..43bdf12a38 100644 --- a/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh +++ b/cpp/include/raft/spatial/knn/detail/ann_quantized_faiss.cuh @@ -39,7 +39,6 @@ #include #include -#include #include #include diff --git a/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh b/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh index 6db8fb7a8e..84c130b0e4 100644 --- a/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh +++ b/cpp/include/raft/spatial/knn/detail/knn_brute_force_faiss.cuh @@ -29,7 +29,6 @@ #include #include -#include #include #include #include diff --git a/cpp/include/raft/spectral/kmeans.hpp b/cpp/include/raft/spectral/kmeans.hpp index 5928c727c6..b6f0105487 100644 --- a/cpp/include/raft/spectral/kmeans.hpp +++ b/cpp/include/raft/spectral/kmeans.hpp @@ -21,7 +21,6 @@ #include #include -#include #include #include #include diff --git a/cpp/include/raft/spectral/modularity_maximization.hpp b/cpp/include/raft/spectral/modularity_maximization.hpp index 1fe7819a7e..fededbfcb4 100644 --- a/cpp/include/raft/spectral/modularity_maximization.hpp +++ b/cpp/include/raft/spectral/modularity_maximization.hpp @@ -20,7 +20,6 @@ #include #include -#include #include #include #include diff --git a/cpp/include/raft/spectral/partition.hpp b/cpp/include/raft/spectral/partition.hpp index a994895886..2df3812a4a 100644 --- a/cpp/include/raft/spectral/partition.hpp +++ b/cpp/include/raft/spectral/partition.hpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/cpp/include/raft/spectral/spectral_util.hpp b/cpp/include/raft/spectral/spectral_util.hpp index de9ff1917f..c148350c0f 100644 --- a/cpp/include/raft/spectral/spectral_util.hpp +++ b/cpp/include/raft/spectral/spectral_util.hpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/cpp/test/eigen_solvers.cu b/cpp/test/eigen_solvers.cu index 8025d8dcd6..ef67d95348 100644 --- a/cpp/test/eigen_solvers.cu +++ b/cpp/test/eigen_solvers.cu @@ -38,7 +38,6 @@ TEST(Raft, EigenSolvers) { index_type nnz = 0; index_type nrows = 0; auto stream = h.get_stream(); - auto t_exe_pol = thrust::cuda::par.on(stream); sparse_matrix_t sm1{h, ro, ci, vs, nrows, nnz}; ASSERT_EQ(nullptr, sm1.row_offsets_); diff --git a/cpp/test/linalg/reduce.cuh b/cpp/test/linalg/reduce.cuh index 18261287cf..86cb4f32b4 100644 --- a/cpp/test/linalg/reduce.cuh +++ b/cpp/test/linalg/reduce.cuh @@ -16,9 +16,9 @@ #include #include -#include #include #include +#include namespace raft { namespace linalg { @@ -51,17 +51,20 @@ void unaryAndGemv(Type *dots, const Type *data, int D, int N, cudaStream_t stream) { //computes a MLCommon unary op on data (squares it), then computes Ax //(A input matrix and x column vector) to sum columns - thrust::device_vector sq(D * N); + rmm::device_uvector sq(D * N, stream); raft::linalg::unaryOp( thrust::raw_pointer_cast(sq.data()), data, D * N, [] __device__(Type v) { return v * v; }, stream); cublasHandle_t handle; CUBLAS_CHECK(cublasCreate(&handle)); - thrust::device_vector ones(N, 1); //column vector [1...1] + rmm::device_uvector ones(N, stream); //column vector [1...1] + raft::linalg::unaryOp( + ones.data(), ones.data(), ones.size(), + [=] __device__(Type input) { return 1; }, stream); Type alpha = 1, beta = 0; - CUBLAS_CHECK(raft::linalg::cublasgemv( - handle, CUBLAS_OP_N, D, N, &alpha, thrust::raw_pointer_cast(sq.data()), D, - thrust::raw_pointer_cast(ones.data()), 1, &beta, dots, 1, stream)); + CUBLAS_CHECK(raft::linalg::cublasgemv(handle, CUBLAS_OP_N, D, N, &alpha, + sq.data(), D, ones.data(), 1, &beta, + dots, 1, stream)); CUDA_CHECK(cudaDeviceSynchronize()); CUBLAS_CHECK(cublasDestroy(handle)); } diff --git a/cpp/test/matrix/matrix.cu b/cpp/test/matrix/matrix.cu index e7da92a136..2d2d9d2057 100644 --- a/cpp/test/matrix/matrix.cu +++ b/cpp/test/matrix/matrix.cu @@ -112,7 +112,7 @@ class MatrixCopyRowsTest : public ::testing::Test { // Init input array thrust::counting_iterator first(0); thrust::device_ptr ptr(input.data()); - thrust::copy(thrust::cuda::par.on(stream), first, first + n_cols * n_rows, + thrust::copy(handle.get_thrust_policy(), first, first + n_cols * n_rows, ptr); } diff --git a/cpp/test/mst.cu b/cpp/test/mst.cu index d7aa76500b..4714fd5eaa 100644 --- a/cpp/test/mst.cu +++ b/cpp/test/mst.cu @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include @@ -127,11 +127,18 @@ class MSTTest v = static_cast((csr_d.offsets.size() / sizeof(vertex_t)) - 1); e = static_cast(csr_d.indices.size() / sizeof(edge_t)); - rmm::device_vector mst_src(2 * v - 2, - std::numeric_limits::max()); - rmm::device_vector mst_dst(2 * v - 2, - std::numeric_limits::max()); - rmm::device_vector color(v, 0); + rmm::device_uvector mst_src(2 * v - 2, handle.get_stream()); + rmm::device_uvector mst_dst(2 * v - 2, handle.get_stream()); + rmm::device_uvector color(v, handle.get_stream()); + + CUDA_CHECK( + cudaMemsetAsync(mst_src.data(), std::numeric_limits::max(), + mst_src.size() * sizeof(vertex_t), handle.get_stream())); + CUDA_CHECK( + cudaMemsetAsync(mst_dst.data(), std::numeric_limits::max(), + mst_dst.size() * sizeof(vertex_t), handle.get_stream())); + CUDA_CHECK(cudaMemsetAsync(color.data(), 0, color.size() * sizeof(vertex_t), + handle.get_stream())); vertex_t *color_ptr = thrust::raw_pointer_cast(color.data()); @@ -214,7 +221,6 @@ class MSTTest protected: MSTTestInput mst_input; CSRDevice csr_d; - rmm::device_vector mst_edge; vertex_t v; edge_t e; int iterations;