From 2279ba871ea2742d4f5f53a8e2ba5b5ccfb69159 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 13 Oct 2021 17:49:10 -0700 Subject: [PATCH 01/18] applying raft handle updates --- cpp/bench/sg/benchmark.cuh | 4 +- cpp/cmake/thirdparty/get_raft.cmake | 4 +- cpp/examples/dbscan/dbscan_example.cpp | 4 +- cpp/examples/kmeans/kmeans_example.cpp | 4 +- cpp/include/cuml/cuml_api.h | 11 +- cpp/src/common/cumlHandle.cpp | 4 +- cpp/src/common/cumlHandle.hpp | 3 +- cpp/src/common/cuml_api.cpp | 26 +-- cpp/src/knn/knn.cu | 7 +- cpp/src/knn/knn_api.cpp | 2 - cpp/src/knn/knn_opg_common.cuh | 27 +-- cpp/src/randomforest/randomforest.cuh | 15 +- cpp/src_prims/linalg/lstsq.cuh | 6 +- .../metrics/batched/silhouette_score.cuh | 9 +- cpp/src_prims/selection/knn.cuh | 45 ++-- cpp/test/mg/pca.cu | 2 +- cpp/test/prims/knn_classify.cu | 6 +- cpp/test/prims/knn_regression.cu | 2 +- cpp/test/sg/cd_test.cu | 29 ++- cpp/test/sg/decisiontree_batchedlevel_algo.cu | 3 +- cpp/test/sg/fil_test.cu | 18 +- cpp/test/sg/handle_test.cu | 29 +-- cpp/test/sg/holtwinters_test.cu | 3 +- cpp/test/sg/lars_test.cu | 95 ++++---- cpp/test/sg/ols.cu | 63 +++--- cpp/test/sg/pca_test.cu | 40 ++-- cpp/test/sg/rf_test.cu | 15 +- cpp/test/sg/ridge.cu | 66 +++--- cpp/test/sg/sgd.cu | 45 ++-- cpp/test/sg/svc_test.cu | 202 +++++++++--------- cpp/test/sg/tsvd_test.cu | 22 +- python/cuml/common/base.pyx | 10 +- python/cuml/ensemble/randomforest_common.pyx | 4 +- .../cuml/linear_model/linear_regression.pyx | 2 +- .../random_projection/random_projection.pyx | 6 +- python/cuml/test/test_base.py | 3 +- python/cuml/test/test_svm.py | 6 +- python/cuml/test/utils.py | 3 +- wiki/cpp/DEVELOPER_GUIDE.md | 4 +- 39 files changed, 401 insertions(+), 448 deletions(-) diff --git a/cpp/bench/sg/benchmark.cuh b/cpp/bench/sg/benchmark.cuh index c2cd8a9ce6..5bf4caf4c6 100644 --- a/cpp/bench/sg/benchmark.cuh +++ b/cpp/bench/sg/benchmark.cuh @@ -37,9 +37,9 @@ class Fixture : public MLCommon::Bench::Fixture { void SetUp(const ::benchmark::State& state) override { - handle.reset(new raft::handle_t(NumStreams)); + auto stream_pool = std::make_shared(NumStreams); + handle.reset(new raft::handle_t{stream, stream_pool}); MLCommon::Bench::Fixture::SetUp(state); - handle->set_stream(stream); } void TearDown(const ::benchmark::State& state) override diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index b0a053b582..2a24b090d5 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -30,8 +30,8 @@ function(find_and_configure_raft) BUILD_EXPORT_SET cuml-exports INSTALL_EXPORT_SET cuml-exports CPM_ARGS - GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git - GIT_TAG ${PKG_PINNED_TAG} + GIT_REPOSITORY https://github.com/divyegala/raft.git + GIT_TAG imp-21.10-handle_stream SOURCE_SUBDIR cpp OPTIONS "BUILD_TESTS OFF" diff --git a/cpp/examples/dbscan/dbscan_example.cpp b/cpp/examples/dbscan/dbscan_example.cpp index af7fd5e6bf..2e6adf8c81 100644 --- a/cpp/examples/dbscan/dbscan_example.cpp +++ b/cpp/examples/dbscan/dbscan_example.cpp @@ -136,8 +136,6 @@ int main(int argc, char* argv[]) } } - raft::handle_t handle; - std::vector h_inputData; if (input == "") { @@ -177,7 +175,7 @@ int main(int argc, char* argv[]) cudaStream_t stream; CUDA_RT_CALL(cudaStreamCreate(&stream)); - handle.set_stream(stream); + raft::handle_t handle{stream}; std::vector h_labels(nRows); int* d_labels = nullptr; diff --git a/cpp/examples/kmeans/kmeans_example.cpp b/cpp/examples/kmeans/kmeans_example.cpp index 3aa9c20a4c..81eab915cb 100644 --- a/cpp/examples/kmeans/kmeans_example.cpp +++ b/cpp/examples/kmeans/kmeans_example.cpp @@ -127,11 +127,9 @@ int main(int argc, char* argv[]) std::cout << "Run KMeans with k=" << params.n_clusters << ", max_iterations=" << params.max_iter << std::endl; - raft::handle_t handle; - cudaStream_t stream; CUDA_RT_CALL(cudaStreamCreate(&stream)); - handle.set_stream(stream); + raft::handle_t handle{stream}; // srcdata size n_samples * n_features double* d_srcdata = nullptr; diff --git a/cpp/include/cuml/cuml_api.h b/cpp/include/cuml/cuml_api.h index a61e85adfe..e2f94ecace 100644 --- a/cpp/include/cuml/cuml_api.h +++ b/cpp/include/cuml/cuml_api.h @@ -53,9 +53,10 @@ const char* cumlGetErrorString(cumlError_t error); * @brief Creates a cumlHandle_t * * @param[inout] handle pointer to the handle to create. + * @param[in] stream the stream to which cuML work should be ordered. * @return CUML_SUCCESS on success, @todo: add more error codes */ -cumlError_t cumlCreate(cumlHandle_t* handle); +cumlError_t cumlCreate(cumlHandle_t* handle, cudaStream_t stream); /** * @brief sets the stream to which all cuML work issued via the passed handle should be ordered. @@ -64,14 +65,6 @@ cumlError_t cumlCreate(cumlHandle_t* handle); * @param[in] stream the stream to which cuML work should be ordered. * @return CUML_SUCCESS on success, @todo: add more error codes */ -cumlError_t cumlSetStream(cumlHandle_t handle, cudaStream_t stream); -/** - * @brief gets the stream to which all cuML work issued via the passed handle should be ordered. - * - * @param[inout] handle handle to get the stream of. - * @param[out] stream pointer to the stream to which cuML work should be ordered. - * @return CUML_SUCCESS on success, @todo: add more error codes - */ cumlError_t cumlGetStream(cumlHandle_t handle, cudaStream_t* stream); /** diff --git a/cpp/src/common/cumlHandle.cpp b/cpp/src/common/cumlHandle.cpp index 3295db6a0a..c39b7b7688 100644 --- a/cpp/src/common/cumlHandle.cpp +++ b/cpp/src/common/cumlHandle.cpp @@ -28,12 +28,12 @@ namespace ML { HandleMap handleMap; -std::pair HandleMap::createAndInsertHandle() +std::pair HandleMap::createAndInsertHandle(cudaStream_t stream) { cumlError_t status = CUML_SUCCESS; cumlHandle_t chosen_handle; try { - auto handle_ptr = new raft::handle_t(); + auto handle_ptr = new raft::handle_t{stream}; bool inserted; { std::lock_guard guard(_mapMutex); diff --git a/cpp/src/common/cumlHandle.hpp b/cpp/src/common/cumlHandle.hpp index 4b0a4793fc..3b142f592f 100644 --- a/cpp/src/common/cumlHandle.hpp +++ b/cpp/src/common/cumlHandle.hpp @@ -33,10 +33,11 @@ class HandleMap { /** * @brief Creates new handle object with associated handle ID and insert into map. * + * @param[in] stream the stream to which cuML work should be ordered. * @return std::pair with handle and error code. If error code is not CUML_SUCCESS * the handle is INVALID_HANDLE. */ - std::pair createAndInsertHandle(); + std::pair createAndInsertHandle(cudaStream_t stream); /** * @brief Lookup pointer to handle object for handle ID in map. diff --git a/cpp/src/common/cuml_api.cpp b/cpp/src/common/cuml_api.cpp index 6284a8aa6f..6a316947e0 100644 --- a/cpp/src/common/cuml_api.cpp +++ b/cpp/src/common/cuml_api.cpp @@ -89,32 +89,10 @@ extern "C" const char* cumlGetErrorString(cumlError_t error) } } -extern "C" cumlError_t cumlCreate(cumlHandle_t* handle) +extern "C" cumlError_t cumlCreate(cumlHandle_t* handle, cudaStream_t stream) { cumlError_t status; - std::tie(*handle, status) = ML::handleMap.createAndInsertHandle(); - return status; -} - -extern "C" cumlError_t cumlSetStream(cumlHandle_t handle, cudaStream_t stream) -{ - cumlError_t status; - raft::handle_t* handle_ptr; - std::tie(handle_ptr, status) = ML::handleMap.lookupHandlePointer(handle); - if (status == CUML_SUCCESS) { - try { - handle_ptr->set_stream(stream); - } - // TODO: Implement this - // catch (const MLCommon::Exception& e) - //{ - // //log e.what()? - // status = e.getErrorCode(); - //} - catch (...) { - status = CUML_ERROR_UNKNOWN; - } - } + std::tie(*handle, status) = ML::handleMap.createAndInsertHandle(stream); return status; } diff --git a/cpp/src/knn/knn.cu b/cpp/src/knn/knn.cu index 0b9fa1640d..61819cf4ba 100644 --- a/cpp/src/knn/knn.cu +++ b/cpp/src/knn/knn.cu @@ -128,7 +128,7 @@ void knn_classify(raft::handle_t& handle, } MLCommon::Selection::knn_classify( - out, knn_indices, y, n_index_rows, n_query_rows, k, uniq_labels, n_unique, stream); + handle, out, knn_indices, y, n_index_rows, n_query_rows, k, uniq_labels, n_unique); } void knn_regress(raft::handle_t& handle, @@ -139,8 +139,7 @@ void knn_regress(raft::handle_t& handle, size_t n_query_rows, int k) { - MLCommon::Selection::knn_regress( - out, knn_indices, y, n_index_rows, n_query_rows, k, handle.get_stream()); + MLCommon::Selection::knn_regress(handle, out, knn_indices, y, n_index_rows, n_query_rows, k); } void knn_class_proba(raft::handle_t& handle, @@ -164,7 +163,7 @@ void knn_class_proba(raft::handle_t& handle, } MLCommon::Selection::class_probs( - out, knn_indices, y, n_index_rows, n_query_rows, k, uniq_labels, n_unique, stream); + handle, out, knn_indices, y, n_index_rows, n_query_rows, k, uniq_labels, n_unique); } }; // END NAMESPACE ML diff --git a/cpp/src/knn/knn_api.cpp b/cpp/src/knn/knn_api.cpp index 701e5c070f..a70f7a4653 100644 --- a/cpp/src/knn/knn_api.cpp +++ b/cpp/src/knn/knn_api.cpp @@ -71,8 +71,6 @@ cumlError_t knn_search(const cumlHandle_t handle, raft::distance::DistanceType metric_distance_type = static_cast(metric_type); - std::vector int_streams = handle_ptr->get_internal_streams(); - std::vector input_vec(n_params); std::vector sizes_vec(n_params); for (int i = 0; i < n_params; i++) { diff --git a/cpp/src/knn/knn_opg_common.cuh b/cpp/src/knn/knn_opg_common.cuh index 606e953016..9854501d6c 100644 --- a/cpp/src/knn/knn_opg_common.cuh +++ b/cpp/src/knn/knn_opg_common.cuh @@ -910,15 +910,8 @@ void perform_local_operation(opg_knn_param& params, y[o] = reinterpret_cast(labels) + (o * n_labels); } - MLCommon::Selection::knn_regress(outputs, - nullptr, - y, - n_labels, - batch_size, - params.k, - handle.get_stream(), - handle.get_internal_streams().data(), - handle.get_num_internal_streams()); + MLCommon::Selection::knn_regress( + handle, outputs, nullptr, y, n_labels, batch_size, params.k); } /*! @@ -952,30 +945,26 @@ void perform_local_operation(opg_knn_param& params, switch (params.knn_op) { case knn_operation::classification: - MLCommon::Selection::knn_classify<32, true>(outputs, + MLCommon::Selection::knn_classify<32, true>(handle, + outputs, nullptr, y, n_labels, batch_size, params.k, *(params.uniq_labels), - *(params.n_unique), - handle.get_stream(), - handle.get_internal_streams().data(), - handle.get_num_internal_streams()); + *(params.n_unique)); break; case knn_operation::class_proba: - MLCommon::Selection::class_probs<32, true>(probas_with_offsets, + MLCommon::Selection::class_probs<32, true>(handle, + probas_with_offsets, nullptr, y, n_labels, batch_size, params.k, *(params.uniq_labels), - *(params.n_unique), - handle.get_stream(), - handle.get_internal_streams().data(), - handle.get_num_internal_streams()); + *(params.n_unique)); break; default: CUML_LOG_DEBUG("FAILURE!"); } diff --git a/cpp/src/randomforest/randomforest.cuh b/cpp/src/randomforest/randomforest.cuh index c15331080c..b107af9c37 100644 --- a/cpp/src/randomforest/randomforest.cuh +++ b/cpp/src/randomforest/randomforest.cuh @@ -140,10 +140,10 @@ class RandomForest { n_sampled_rows = n_rows; } int n_streams = this->rf_params.n_streams; - ASSERT(n_streams <= handle.get_num_internal_streams(), - "rf_params.n_streams (=%d) should be <= raft::handle_t.n_streams (=%d)", + ASSERT(static_cast(n_streams) <= handle.get_stream_pool_size(), + "rf_params.n_streams (=%d) should be <= raft::handle_t.n_streams (=%lu)", n_streams, - handle.get_num_internal_streams()); + handle.get_stream_pool_size()); // Select n_sampled_rows (with replacement) numbers from [0, n_rows) per tree. // selected_rows: randomly generated IDs for bootstrapped samples (w/ replacement); a device @@ -152,7 +152,7 @@ class RandomForest { // constructor std::deque> selected_rows; for (int i = 0; i < n_streams; i++) { - selected_rows.emplace_back(n_sampled_rows, handle.get_internal_stream(i)); + selected_rows.emplace_back(n_sampled_rows, handle.get_stream_from_stream_pool(i)); } auto global_quantiles = @@ -162,7 +162,7 @@ class RandomForest { #pragma omp parallel for num_threads(n_streams) for (int i = 0; i < this->rf_params.n_trees; i++) { int stream_id = omp_get_thread_num(); - auto s = handle.get_internal_stream(stream_id); + auto s = handle.get_stream_from_stream_pool(i); this->get_row_sample(i, n_rows, &selected_rows[stream_id], s); @@ -189,10 +189,7 @@ class RandomForest { i); } // Cleanup - for (int i = 0; i < n_streams; i++) { - auto s = handle.get_internal_stream(i); - CUDA_CHECK(cudaStreamSynchronize(s)); - } + handle.sync_stream_pool(); CUDA_CHECK(cudaStreamSynchronize(handle.get_stream())); ML::POP_RANGE(); } diff --git a/cpp/src_prims/linalg/lstsq.cuh b/cpp/src_prims/linalg/lstsq.cuh index abc143003e..32e8775c45 100644 --- a/cpp/src_prims/linalg/lstsq.cuh +++ b/cpp/src_prims/linalg/lstsq.cuh @@ -255,15 +255,15 @@ void lstsqEig(const raft::handle_t& handle, rmm::cuda_stream_view multAbStream = mainStream; bool concurrent = false; { - int sp_size = handle.get_num_internal_streams(); + int sp_size = handle.get_stream_pool_size(); if (sp_size > 0) { - multAbStream = handle.get_internal_stream_view(0); + multAbStream = handle.get_stream_from_stream_pool(0); // check if the two streams can run concurrently if (!are_implicitly_synchronized(mainStream, multAbStream)) { concurrent = true; } else if (sp_size > 1) { mainStream = multAbStream; - multAbStream = handle.get_internal_stream_view(1); + multAbStream = handle.get_stream_from_stream_pool(1); concurrent = true; } } diff --git a/cpp/src_prims/metrics/batched/silhouette_score.cuh b/cpp/src_prims/metrics/batched/silhouette_score.cuh index 7abc21a738..0cd2a1d75f 100644 --- a/cpp/src_prims/metrics/batched/silhouette_score.cuh +++ b/cpp/src_prims/metrics/batched/silhouette_score.cuh @@ -214,7 +214,7 @@ value_t silhouette_score( detail::fill_b_kernel<<>>( b_ptr, y, n_rows, n_labels, cluster_counts.data()); - handle.wait_on_user_stream(); + handle.wait_stream_pool_on_stream(); auto n_iters = 0; @@ -222,10 +222,7 @@ value_t silhouette_score( for (value_idx j = 0; j < n_rows; j += chunk) { ++n_iters; - auto chunk_stream = raft::select_stream(stream, - handle.get_internal_streams().data(), - handle.get_num_internal_streams(), - i + chunk * j); + auto chunk_stream = handle.get_next_usable_stream(i + chunk * j); auto* left_begin = X + (i * n_cols); auto* right_begin = X + (j * n_cols); @@ -251,7 +248,7 @@ value_t silhouette_score( } } - handle.wait_on_internal_streams(); + handle.sync_stream_pool(); // calculating row-wise minimum in b // this prim only supports int indices for now diff --git a/cpp/src_prims/selection/knn.cuh b/cpp/src_prims/selection/knn.cuh index 1b03835376..d6b26dfdff 100644 --- a/cpp/src_prims/selection/knn.cuh +++ b/cpp/src_prims/selection/knn.cuh @@ -169,20 +169,18 @@ __global__ void regress_avg_kernel(LabelType* out, * the user_stream is used. */ template -void class_probs(std::vector& out, +void class_probs(const raft::handle_t& handle, + std::vector& out, const int64_t* knn_indices, std::vector& y, std::size_t n_index_rows, std::size_t n_query_rows, int k, std::vector& uniq_labels, - std::vector& n_unique, - cudaStream_t user_stream, - cudaStream_t* int_streams = nullptr, - int n_int_streams = 0) + std::vector& n_unique) { for (std::size_t i = 0; i < y.size(); i++) { - cudaStream_t stream = raft::select_stream(user_stream, int_streams, n_int_streams, i); + cudaStream_t stream = handle.get_next_usable_stream(); int n_unique_labels = n_unique[i]; size_t cur_size = n_query_rows * n_unique_labels; @@ -246,17 +244,15 @@ void class_probs(std::vector& out, * the user_stream is used. */ template -void knn_classify(int* out, +void knn_classify(const raft::handle_t& handle, + int* out, const int64_t* knn_indices, std::vector& y, std::size_t n_index_rows, std::size_t n_query_rows, int k, std::vector& uniq_labels, - std::vector& n_unique, - cudaStream_t user_stream, - cudaStream_t* int_streams = nullptr, - int n_int_streams = 0) + std::vector& n_unique) { std::vector probs; std::vector> tmp_probs; @@ -265,7 +261,7 @@ void knn_classify(int* out, for (std::size_t i = 0; i < n_unique.size(); i++) { int size = n_unique[i]; - cudaStream_t stream = raft::select_stream(user_stream, int_streams, n_int_streams, i); + cudaStream_t stream = handle.get_next_usable_stream(i); tmp_probs.emplace_back(n_query_rows * size, stream); probs.push_back(tmp_probs.back().data()); @@ -277,23 +273,14 @@ void knn_classify(int* out, * Note: Since class_probs will use the same round robin strategy for distributing * work to the streams, we don't need to explicitly synchronize the streams here. */ - class_probs<32, precomp_lbls>(probs, - knn_indices, - y, - n_index_rows, - n_query_rows, - k, - uniq_labels, - n_unique, - user_stream, - int_streams, - n_int_streams); + class_probs<32, precomp_lbls>( + handle, probs, knn_indices, y, n_index_rows, n_query_rows, k, uniq_labels, n_unique); dim3 grid(raft::ceildiv(n_query_rows, static_cast(TPB_X)), 1, 1); dim3 blk(TPB_X, 1, 1); for (std::size_t i = 0; i < y.size(); i++) { - cudaStream_t stream = raft::select_stream(user_stream, int_streams, n_int_streams, i); + cudaStream_t stream = handle.get_next_usable_stream(i); int n_unique_labels = n_unique[i]; @@ -334,21 +321,19 @@ void knn_classify(int* out, */ template -void knn_regress(ValType* out, +void knn_regress(const raft::handle_t& handle, + ValType* out, const int64_t* knn_indices, const std::vector& y, size_t n_index_rows, size_t n_query_rows, - int k, - cudaStream_t user_stream, - cudaStream_t* int_streams = nullptr, - int n_int_streams = 0) + int k) { /** * Vote average regression value */ for (std::size_t i = 0; i < y.size(); i++) { - cudaStream_t stream = raft::select_stream(user_stream, int_streams, n_int_streams, i); + cudaStream_t stream = handle.get_next_usable_stream(); regress_avg_kernel <<(TPB_X)), TPB_X, 0, stream>>>( diff --git a/cpp/test/mg/pca.cu b/cpp/test/mg/pca.cu index c5cf1d9bac..32434417e9 100644 --- a/cpp/test/mg/pca.cu +++ b/cpp/test/mg/pca.cu @@ -79,7 +79,7 @@ class PCAOpgTest : public testing::TestWithParam { std::vector*> inParts; Matrix::opg::allocate(handle, inParts, desc, myRank, stream); Matrix::opg::randomize(handle, r, inParts, desc, myRank, stream, T(10.0), T(20.0)); - handle.wait_on_user_stream(); + handle.sync_stream(); prmsPCA.n_rows = params.M; prmsPCA.n_cols = params.N; diff --git a/cpp/test/prims/knn_classify.cu b/cpp/test/prims/knn_classify.cu index c9be1e0d66..4ac9499480 100644 --- a/cpp/test/prims/knn_classify.cu +++ b/cpp/test/prims/knn_classify.cu @@ -92,15 +92,15 @@ class KNNClassifyTest : public ::testing::TestWithParam { std::vector n_unique; n_unique.push_back(n_classes); - knn_classify(pred_labels, + knn_classify(handle, + pred_labels, knn_indices, y, params.rows, params.rows, params.k, uniq_labels, - n_unique, - stream); + n_unique); CUDA_CHECK(cudaStreamSynchronize(stream)); } diff --git a/cpp/test/prims/knn_regression.cu b/cpp/test/prims/knn_regression.cu index 3de9d371ca..68c446bd5e 100644 --- a/cpp/test/prims/knn_regression.cu +++ b/cpp/test/prims/knn_regression.cu @@ -119,7 +119,7 @@ class KNNRegressionTest : public ::testing::TestWithParam { std::vector y; y.push_back(train_labels); - knn_regress(pred_labels, knn_indices, y, params.rows, params.rows, params.k, stream); + knn_regress(handle, pred_labels, knn_indices, y, params.rows, params.rows, params.k); CUDA_CHECK(cudaStreamSynchronize(stream)); } diff --git a/cpp/test/sg/cd_test.cu b/cpp/test/sg/cd_test.cu index 8ef1ce27e7..b3918c430a 100644 --- a/cpp/test/sg/cd_test.cu +++ b/cpp/test/sg/cd_test.cu @@ -38,6 +38,8 @@ class CdTest : public ::testing::TestWithParam> { protected: void lasso() { + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); int len = params.n_row * params.n_col; @@ -158,12 +160,7 @@ class CdTest : public ::testing::TestWithParam> { stream); } - void SetUp() override - { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); - lasso(); - } + void SetUp() override { lasso(); } void TearDown() override { @@ -177,7 +174,6 @@ class CdTest : public ::testing::TestWithParam> { CUDA_CHECK(cudaFree(coef3_ref)); CUDA_CHECK(cudaFree(coef4)); CUDA_CHECK(cudaFree(coef4_ref)); - CUDA_CHECK(cudaStreamDestroy(stream)); } protected: @@ -187,7 +183,6 @@ class CdTest : public ::testing::TestWithParam> { T *coef3, *coef3_ref; T *coef4, *coef4_ref; T intercept, intercept2; - cudaStream_t stream = 0; raft::handle_t handle; }; @@ -198,17 +193,19 @@ const std::vector> inputsd2 = {{0.01, 4, 2}}; typedef CdTest CdTestF; TEST_P(CdTestF, Fit) { - ASSERT_TRUE( - raft::devArrMatch(coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol))); + auto stream = handle.get_stream(); - ASSERT_TRUE( - raft::devArrMatch(coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(coef3_ref, coef3, params.n_col, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(coef4_ref, coef4, params.n_col, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + coef3_ref, coef3, params.n_col, raft::CompareApproxAbs(params.tol), stream)); + + ASSERT_TRUE(raft::devArrMatch( + coef4_ref, coef4, params.n_col, raft::CompareApproxAbs(params.tol), stream)); } typedef CdTest CdTestD; diff --git a/cpp/test/sg/decisiontree_batchedlevel_algo.cu b/cpp/test/sg/decisiontree_batchedlevel_algo.cu index a4f4663be0..c9e500eabe 100644 --- a/cpp/test/sg/decisiontree_batchedlevel_algo.cu +++ b/cpp/test/sg/decisiontree_batchedlevel_algo.cu @@ -46,9 +46,8 @@ class DtBaseTest : public ::testing::TestWithParam { void SetUp() { inparams = ::testing::TestWithParam::GetParam(); - handle.reset(new raft::handle_t); CUDA_CHECK(cudaStreamCreate(&stream)); - handle->set_stream(stream); + handle.reset(new raft::handle_t{stream}); set_tree_params(params, inparams.max_depth, 1 << inparams.max_depth, diff --git a/cpp/test/sg/fil_test.cu b/cpp/test/sg/fil_test.cu index 293222667e..4ef8eac6a7 100644 --- a/cpp/test/sg/fil_test.cu +++ b/cpp/test/sg/fil_test.cu @@ -214,8 +214,6 @@ class BaseFilTest : public testing::TestWithParam { { // setup ps = testing::TestWithParam::GetParam(); - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); generate_forest(); generate_data(); @@ -236,6 +234,8 @@ class BaseFilTest : public testing::TestWithParam { void generate_forest() { + auto stream = handle.get_stream(); + size_t num_nodes = forest_num_nodes(); // helper data @@ -321,7 +321,7 @@ class BaseFilTest : public testing::TestWithParam { raft::update_host(def_lefts_h, def_lefts_d, num_nodes, stream); raft::update_host(is_leafs_h, is_leafs_d, num_nodes, stream); raft::update_host(is_categoricals_h.data(), is_categoricals_d.data(), num_nodes, stream); - CUDA_CHECK(cudaStreamSynchronize(stream)); + handle.sync_stream(); // mark leaves for (int i = 0; i < ps.num_trees; ++i) { @@ -409,6 +409,7 @@ class BaseFilTest : public testing::TestWithParam { void generate_data() { + auto stream = handle.get_stream(); // allocate arrays size_t num_data = ps.num_rows * ps.num_cols; raft::allocate(data_d, num_data, stream); @@ -434,7 +435,7 @@ class BaseFilTest : public testing::TestWithParam { // copy to host data_h.resize(num_data); raft::update_host(data_h.data(), data_d, num_data, stream); - CUDA_CHECK(cudaStreamSynchronize(stream)); + handle.sync_stream(); // clean up CUDA_CHECK(cudaFree(mask_d)); @@ -470,6 +471,7 @@ class BaseFilTest : public testing::TestWithParam { void predict_on_cpu() { + auto stream = handle.get_stream(); // predict on host std::vector want_preds_h(ps.num_preds_outputs()); want_proba_h.resize(ps.num_proba_outputs()); @@ -554,13 +556,14 @@ class BaseFilTest : public testing::TestWithParam { raft::allocate(want_proba_d, ps.num_proba_outputs(), stream); raft::update_device(want_preds_d, want_preds_h.data(), ps.num_preds_outputs(), stream); raft::update_device(want_proba_d, want_proba_h.data(), ps.num_proba_outputs(), stream); - CUDA_CHECK(cudaStreamSynchronize(stream)); + handle.sync_stream(); } virtual void init_forest(fil::forest_t* pforest) = 0; void predict_on_gpu() { + auto stream = handle.get_stream(); fil::forest_t forest = nullptr; init_forest(&forest); @@ -569,7 +572,7 @@ class BaseFilTest : public testing::TestWithParam { raft::allocate(proba_d, ps.num_proba_outputs(), stream); fil::predict(handle, forest, preds_d, data_d, ps.num_rows); fil::predict(handle, forest, proba_d, data_d, ps.num_rows, true); - CUDA_CHECK(cudaStreamSynchronize(stream)); + handle.sync_stream(); // cleanup fil::free(handle, forest); @@ -577,6 +580,7 @@ class BaseFilTest : public testing::TestWithParam { void compare() { + auto stream = handle.get_stream(); ASSERT_TRUE(raft::devArrMatch(want_proba_d, proba_d, ps.num_proba_outputs(), @@ -627,7 +631,6 @@ class BaseFilTest : public testing::TestWithParam { rmm::device_uvector max_matching_cat_d = rmm::device_uvector(0, cudaStream_t()); // parameters - cudaStream_t stream = 0; raft::handle_t handle; FilTestParams ps; }; @@ -817,6 +820,7 @@ class TreeliteFilTest : public BaseFilTest { void init_forest_impl(fil::forest_t* pforest, fil::storage_type_t storage_type) { + auto stream = handle.get_stream(); bool random_forest_flag = (ps.output & fil::output_t::AVG) != 0; int treelite_num_classes = ps.leaf_algo == fil::leaf_algo_t::FLOAT_UNARY_BINARY ? 1 : ps.num_classes; diff --git a/cpp/test/sg/handle_test.cu b/cpp/test/sg/handle_test.cu index 4600443d38..4842ea3f26 100644 --- a/cpp/test/sg/handle_test.cu +++ b/cpp/test/sg/handle_test.cu @@ -21,7 +21,9 @@ TEST(HandleTest, CreateHandleAndDestroy) { cumlHandle_t handle; - cumlError_t status = cumlCreate(&handle); + cudaStream_t stream; + cudaStreamCreate(&stream); + cumlError_t status = cumlCreate(&handle, stream); EXPECT_EQ(CUML_SUCCESS, status); status = cumlDestroy(handle); @@ -31,7 +33,9 @@ TEST(HandleTest, CreateHandleAndDestroy) TEST(HandleTest, DoubleDestoryFails) { cumlHandle_t handle; - cumlError_t status = cumlCreate(&handle); + cudaStream_t stream; + cudaStreamCreate(&stream); + cumlError_t status = cumlCreate(&handle, stream); EXPECT_EQ(CUML_SUCCESS, status); status = cumlDestroy(handle); @@ -39,23 +43,4 @@ TEST(HandleTest, DoubleDestoryFails) // handle is destroyed status = cumlDestroy(handle); EXPECT_EQ(CUML_INVALID_HANDLE, status); -} - -TEST(HandleTest, set_stream) -{ - cumlHandle_t handle; - cumlError_t status = cumlCreate(&handle); - EXPECT_EQ(CUML_SUCCESS, status); - - status = cumlSetStream(handle, 0); - EXPECT_EQ(CUML_SUCCESS, status); - - status = cumlDestroy(handle); - EXPECT_EQ(CUML_SUCCESS, status); -} - -TEST(HandleTest, SetStreamInvalidHandle) -{ - cumlHandle_t handle = 12346; - EXPECT_EQ(CUML_INVALID_HANDLE, cumlSetStream(handle, 0)); -} +} \ No newline at end of file diff --git a/cpp/test/sg/holtwinters_test.cu b/cpp/test/sg/holtwinters_test.cu index bcd05db27d..be62ad235e 100644 --- a/cpp/test/sg/holtwinters_test.cu +++ b/cpp/test/sg/holtwinters_test.cu @@ -79,8 +79,7 @@ class HoltWintersTest : public ::testing::TestWithParam> { raft::allocate(data, batch_size * n, stream); raft::update_device(data, dataset_h, batch_size * n, stream); - raft::handle_t handle; - handle.set_stream(stream); + raft::handle_t handle{stream}; ML::HoltWinters::fit(handle, n, diff --git a/cpp/test/sg/lars_test.cu b/cpp/test/sg/lars_test.cu index 70054c25f6..615594aeb7 100644 --- a/cpp/test/sg/lars_test.cu +++ b/cpp/test/sg/lars_test.cu @@ -41,18 +41,16 @@ class LarsTest : public ::testing::Test { ws(n_cols, handle.get_stream()), A(1, handle.get_stream()) { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); + auto stream = handle.get_stream(); raft::update_device(cor.data(), cor_host, n_cols, stream); raft::update_device(X.data(), X_host, n_cols * n_rows, stream); raft::update_device(G.data(), G_host, n_cols * n_cols, stream); raft::update_device(sign.data(), sign_host, n_cols, stream); } - void TearDown() override { CUDA_CHECK(cudaStreamDestroy(stream)); } - void testSelectMostCorrelated() { + auto stream = handle.get_stream(); math_t cj; int idx; rmm::device_uvector workspace(n_cols, stream); @@ -64,6 +62,7 @@ class LarsTest : public ::testing::Test { void testMoveToActive() { + auto stream = handle.get_stream(); ML::Solver::Lars::moveToActive(handle.get_cublas_handle(), &n_active, 3, @@ -79,10 +78,12 @@ class LarsTest : public ::testing::Test { stream); EXPECT_EQ(n_active, 3); - EXPECT_TRUE(raft::devArrMatchHost(cor_exp, cor.data(), n_cols, raft::Compare())); - EXPECT_TRUE(raft::devArrMatchHost(G_exp, G.data(), n_cols * n_cols, raft::Compare())); EXPECT_TRUE( - raft::devArrMatch((math_t)1.0, sign.data() + n_active - 1, 1, raft::Compare())); + raft::devArrMatchHost(cor_exp, cor.data(), n_cols, raft::Compare(), stream)); + EXPECT_TRUE( + raft::devArrMatchHost(G_exp, G.data(), n_cols * n_cols, raft::Compare(), stream)); + EXPECT_TRUE(raft::devArrMatch( + (math_t)1.0, sign.data() + n_active - 1, 1, raft::Compare(), stream)); // Do it again with G == nullptr to test if X is properly changed n_active = 2; @@ -99,11 +100,13 @@ class LarsTest : public ::testing::Test { n_cols, sign.data(), stream); - EXPECT_TRUE(raft::devArrMatchHost(X_exp, X.data(), n_rows * n_cols, raft::Compare())); + EXPECT_TRUE( + raft::devArrMatchHost(X_exp, X.data(), n_rows * n_cols, raft::Compare(), stream)); } void calcUExp(math_t* G, int n_cols, math_t* U_dev_exp) { + auto stream = handle.get_stream(); rmm::device_scalar devInfo(stream); rmm::device_uvector workspace(0, stream); int n_work; @@ -127,6 +130,7 @@ class LarsTest : public ::testing::Test { // Initialize a mix of G and U matrices to test updateCholesky void initGU(math_t* GU, math_t* G, math_t* U, int n_active, bool copy_G) { + auto stream = handle.get_stream(); const int ld_U = n_cols; // First we copy over all elements, because the factorization only replaces // the upper triangular part. This way it will be easier to compare to the @@ -143,6 +147,7 @@ class LarsTest : public ::testing::Test { void testUpdateCholesky() { + auto stream = handle.get_stream(); const int ld_X = n_rows; const int ld_G = n_cols; const int ld_U = ld_G; @@ -170,7 +175,7 @@ class LarsTest : public ::testing::Test { eps, stream); EXPECT_TRUE(raft::devArrMatch( - U_dev_exp.data(), U.data(), n_cols * n_cols, raft::CompareApprox(1e-5))); + U_dev_exp.data(), U.data(), n_cols * n_cols, raft::CompareApprox(1e-5), stream)); // Next test where G and U are separate arrays initGU(U.data(), G.data(), U_dev_exp.data(), n_active, false); @@ -188,7 +193,7 @@ class LarsTest : public ::testing::Test { eps, stream); EXPECT_TRUE(raft::devArrMatch( - U_dev_exp.data(), U.data(), n_cols * n_cols, raft::CompareApprox(1e-5))); + U_dev_exp.data(), U.data(), n_cols * n_cols, raft::CompareApprox(1e-5), stream)); // Third test without Gram matrix. initGU(U.data(), G.data(), U_dev_exp.data(), n_active, false); @@ -206,11 +211,12 @@ class LarsTest : public ::testing::Test { eps, stream); EXPECT_TRUE(raft::devArrMatch( - U_dev_exp.data(), U.data(), n_cols * n_cols, raft::CompareApprox(1e-4))); + U_dev_exp.data(), U.data(), n_cols * n_cols, raft::CompareApprox(1e-4), stream)); } void testCalcW0() { + auto stream = handle.get_stream(); n_active = 4; const int ld_U = n_cols; rmm::device_uvector ws(n_active, stream); @@ -219,24 +225,26 @@ class LarsTest : public ::testing::Test { ML::Solver::Lars::calcW0( handle, n_active, n_cols, sign.data(), U.data(), ld_U, ws.data(), stream); - EXPECT_TRUE( - raft::devArrMatchHost(ws0_exp, ws.data(), n_active, raft::CompareApprox(1e-3))); + EXPECT_TRUE(raft::devArrMatchHost( + ws0_exp, ws.data(), n_active, raft::CompareApprox(1e-3), stream)); } void testCalcA() { - n_active = 4; + auto stream = handle.get_stream(); + n_active = 4; rmm::device_uvector ws(n_active, stream); raft::update_device(ws.data(), ws0_exp, n_active, stream); ML::Solver::Lars::calcA(handle, A.data(), n_active, sign.data(), ws.data(), stream); EXPECT_TRUE(raft::devArrMatch( - (math_t)0.20070615686577709, A.data(), 1, raft::CompareApprox(1e-6))); + (math_t)0.20070615686577709, A.data(), 1, raft::CompareApprox(1e-6), stream)); } void testEquiangular() { - n_active = 4; + auto stream = handle.get_stream(); + n_active = 4; rmm::device_uvector workspace(0, stream); rmm::device_uvector u_eq(n_rows, stream); rmm::device_uvector U(n_cols * n_cols, stream); @@ -263,11 +271,11 @@ class LarsTest : public ::testing::Test { (math_t)-1, stream); - EXPECT_TRUE( - raft::devArrMatchHost(ws_exp, ws.data(), n_active, raft::CompareApprox(1e-3))); + EXPECT_TRUE(raft::devArrMatchHost( + ws_exp, ws.data(), n_active, raft::CompareApprox(1e-3), stream)); EXPECT_TRUE(raft::devArrMatch( - (math_t)0.20070615686577709, A.data(), 1, raft::CompareApprox(1e-4))); + (math_t)0.20070615686577709, A.data(), 1, raft::CompareApprox(1e-4), stream)); // Now test without Gram matrix, u should be calculated in this case initGU(G.data(), G.data(), U.data(), n_active, false); @@ -289,11 +297,13 @@ class LarsTest : public ::testing::Test { (math_t)-1, stream); - EXPECT_TRUE(raft::devArrMatchHost(u_eq_exp, u_eq.data(), 1, raft::CompareApprox(1e-3))); + EXPECT_TRUE( + raft::devArrMatchHost(u_eq_exp, u_eq.data(), 1, raft::CompareApprox(1e-3), stream)); } void testCalcMaxStep() { + auto stream = handle.get_stream(); n_active = 2; math_t A_host = 3.6534305290498055; math_t ws_host[2] = {0.25662594, -0.01708941}; @@ -331,10 +341,11 @@ class LarsTest : public ::testing::Test { a_vec.data(), stream); math_t gamma_exp = 0.20095407186830386; - EXPECT_TRUE(raft::devArrMatch(gamma_exp, gamma.data(), 1, raft::CompareApprox(1e-6))); + EXPECT_TRUE( + raft::devArrMatch(gamma_exp, gamma.data(), 1, raft::CompareApprox(1e-6), stream)); math_t a_vec_exp[2] = {24.69447886, -139.66289908}; EXPECT_TRUE(raft::devArrMatchHost( - a_vec_exp, a_vec.data(), a_vec.size(), raft::CompareApprox(1e-4))); + a_vec_exp, a_vec.data(), a_vec.size(), raft::CompareApprox(1e-4), stream)); // test without G matrix, we use U as input in this case CUDA_CHECK(cudaMemsetAsync(gamma.data(), 0, sizeof(math_t), stream)); @@ -356,9 +367,10 @@ class LarsTest : public ::testing::Test { gamma.data(), a_vec.data(), stream); - EXPECT_TRUE(raft::devArrMatch(gamma_exp, gamma.data(), 1, raft::CompareApprox(1e-6))); + EXPECT_TRUE( + raft::devArrMatch(gamma_exp, gamma.data(), 1, raft::CompareApprox(1e-6), stream)); EXPECT_TRUE(raft::devArrMatchHost( - a_vec_exp, a_vec.data(), a_vec.size(), raft::CompareApprox(1e-4))); + a_vec_exp, a_vec.data(), a_vec.size(), raft::CompareApprox(1e-4), stream)); // Last iteration n_active = max_iter; @@ -381,11 +393,11 @@ class LarsTest : public ::testing::Test { a_vec.data(), stream); gamma_exp = 11.496044516528272; - EXPECT_TRUE(raft::devArrMatch(gamma_exp, gamma.data(), 1, raft::CompareApprox(1e-6))); + EXPECT_TRUE( + raft::devArrMatch(gamma_exp, gamma.data(), 1, raft::CompareApprox(1e-6), stream)); } raft::handle_t handle; - cudaStream_t stream = 0; const int n_rows = 4; const int n_cols = 4; @@ -452,17 +464,15 @@ class LarsTestFitPredict : public ::testing::Test { alphas(n_cols + 1, handle.get_stream()), active_idx(n_cols, handle.get_stream()) { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); + auto stream = handle.get_stream(); raft::update_device(X.data(), X_host, n_cols * n_rows, stream); raft::update_device(y.data(), y_host, n_rows, stream); raft::update_device(G.data(), G_host, n_cols * n_cols, stream); } - void TearDown() override { CUDA_CHECK(cudaStreamDestroy(stream)); } - void testFitGram() { + auto stream = handle.get_stream(); int max_iter = 10; int verbosity = 0; int n_active; @@ -483,16 +493,17 @@ class LarsTestFitPredict : public ::testing::Test { n_cols, (math_t)-1); EXPECT_EQ(n_cols, n_active); - EXPECT_TRUE( - raft::devArrMatchHost(beta_exp, beta.data(), n_cols, raft::CompareApprox(1e-5))); EXPECT_TRUE(raft::devArrMatchHost( - alphas_exp, alphas.data(), n_cols + 1, raft::CompareApprox(1e-4))); + beta_exp, beta.data(), n_cols, raft::CompareApprox(1e-5), stream)); + EXPECT_TRUE(raft::devArrMatchHost( + alphas_exp, alphas.data(), n_cols + 1, raft::CompareApprox(1e-4), stream)); EXPECT_TRUE( - raft::devArrMatchHost(indices_exp, active_idx.data(), n_cols, raft::Compare())); + raft::devArrMatchHost(indices_exp, active_idx.data(), n_cols, raft::Compare(), stream)); } void testFitX() { + auto stream = handle.get_stream(); int max_iter = 10; int verbosity = 0; int n_active; @@ -513,16 +524,17 @@ class LarsTestFitPredict : public ::testing::Test { n_cols, (math_t)-1); EXPECT_EQ(n_cols, n_active); - EXPECT_TRUE( - raft::devArrMatchHost(beta_exp, beta.data(), n_cols, raft::CompareApprox(2e-4))); EXPECT_TRUE(raft::devArrMatchHost( - alphas_exp, alphas.data(), n_cols + 1, raft::CompareApprox(1e-4))); + beta_exp, beta.data(), n_cols, raft::CompareApprox(2e-4), stream)); + EXPECT_TRUE(raft::devArrMatchHost( + alphas_exp, alphas.data(), n_cols + 1, raft::CompareApprox(1e-4), stream)); EXPECT_TRUE( - raft::devArrMatchHost(indices_exp, active_idx.data(), n_cols, raft::Compare())); + raft::devArrMatchHost(indices_exp, active_idx.data(), n_cols, raft::Compare(), stream)); } void testPredictV1() { + auto stream = handle.get_stream(); int ld_X = n_rows; int n_active = n_cols; raft::update_device(beta.data(), beta_exp, n_active, stream); @@ -540,11 +552,12 @@ class LarsTestFitPredict : public ::testing::Test { intercept, y.data()); EXPECT_TRUE( - raft::devArrMatchHost(pred_exp, y.data(), n_rows, raft::CompareApprox(1e-5))); + raft::devArrMatchHost(pred_exp, y.data(), n_rows, raft::CompareApprox(1e-5), stream)); } void testPredictV2() { + auto stream = handle.get_stream(); int ld_X = n_rows; int n_active = n_cols; @@ -566,11 +579,12 @@ class LarsTestFitPredict : public ::testing::Test { intercept, y.data()); EXPECT_TRUE( - raft::devArrMatchHost(pred_exp, y.data(), n_rows, raft::CompareApprox(1e-5))); + raft::devArrMatchHost(pred_exp, y.data(), n_rows, raft::CompareApprox(1e-5), stream)); } void testFitLarge() { + auto stream = handle.get_stream(); int n_rows = 65536; int n_cols = 10; int max_iter = n_cols; @@ -606,7 +620,6 @@ class LarsTestFitPredict : public ::testing::Test { } raft::handle_t handle; - cudaStream_t stream = 0; const int n_rows = 10; const int n_cols = 5; diff --git a/cpp/test/sg/ols.cu b/cpp/test/sg/ols.cu index a0965b1827..31744f1af0 100644 --- a/cpp/test/sg/ols.cu +++ b/cpp/test/sg/ols.cu @@ -41,9 +41,10 @@ class OlsTest : public ::testing::TestWithParam> { protected: void basicTest() { - params = ::testing::TestWithParam>::GetParam(); - int len = params.n_row * params.n_col; - int len2 = params.n_row_2 * params.n_col; + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); + int len = params.n_row * params.n_col; + int len2 = params.n_row_2 * params.n_col; raft::allocate(data, len, stream); raft::allocate(labels, params.n_row, stream); @@ -152,8 +153,9 @@ class OlsTest : public ::testing::TestWithParam> { void basicTest2() { - params = ::testing::TestWithParam>::GetParam(); - int len = params.n_row * params.n_col; + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); + int len = params.n_row * params.n_col; raft::allocate(data_sc, len, stream); raft::allocate(labels_sc, len, stream); @@ -180,8 +182,6 @@ class OlsTest : public ::testing::TestWithParam> { void SetUp() override { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); basicTest(); basicTest2(); } @@ -208,7 +208,6 @@ class OlsTest : public ::testing::TestWithParam> { CUDA_CHECK(cudaFree(labels_sc)); CUDA_CHECK(cudaFree(coef_sc)); CUDA_CHECK(cudaFree(coef_sc_ref)); - CUDA_CHECK(cudaStreamDestroy(stream)); } protected: @@ -219,7 +218,6 @@ class OlsTest : public ::testing::TestWithParam> { T *data_sc, *labels_sc, *coef_sc, *coef_sc_ref; T intercept, intercept2, intercept3; raft::handle_t handle; - cudaStream_t stream = 0; }; const std::vector> inputsf2 = { @@ -231,48 +229,53 @@ const std::vector> inputsd2 = { typedef OlsTest OlsTestF; TEST_P(OlsTestF, Fit) { - ASSERT_TRUE(devArrMatch(coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol))); - + auto stream = handle.get_stream(); ASSERT_TRUE( - devArrMatch(coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol))); + devArrMatch(coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE( - devArrMatch(coef3_ref, coef3, params.n_col, raft::CompareApproxAbs(params.tol))); + devArrMatch(coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE( - devArrMatch(pred_ref, pred, params.n_row_2, raft::CompareApproxAbs(params.tol))); + devArrMatch(coef3_ref, coef3, params.n_col, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE( - devArrMatch(pred2_ref, pred2, params.n_row_2, raft::CompareApproxAbs(params.tol))); + devArrMatch(pred_ref, pred, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - devArrMatch(pred3_ref, pred3, params.n_row_2, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(devArrMatch( + pred2_ref, pred2, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE(devArrMatch(coef_sc_ref, coef_sc, 1, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(devArrMatch( + pred3_ref, pred3, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); + + ASSERT_TRUE( + devArrMatch(coef_sc_ref, coef_sc, 1, raft::CompareApproxAbs(params.tol), stream)); } typedef OlsTest OlsTestD; TEST_P(OlsTestD, Fit) { - ASSERT_TRUE( - raft::devArrMatch(coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol))); + auto stream = handle.get_stream(); + ASSERT_TRUE(raft::devArrMatch( + coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(coef3_ref, coef3, params.n_col, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + coef3_ref, coef3, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(pred_ref, pred, params.n_row_2, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + pred_ref, pred, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - devArrMatch(pred2_ref, pred2, params.n_row_2, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(devArrMatch( + pred2_ref, pred2, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE(raft::devArrMatch( - pred3_ref, pred3, params.n_row_2, raft::CompareApproxAbs(params.tol))); + pred3_ref, pred3, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE(devArrMatch(coef_sc_ref, coef_sc, 1, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE( + devArrMatch(coef_sc_ref, coef_sc, 1, raft::CompareApproxAbs(params.tol), stream)); } INSTANTIATE_TEST_CASE_P(OlsTests, OlsTestF, ::testing::ValuesIn(inputsf2)); diff --git a/cpp/test/sg/pca_test.cu b/cpp/test/sg/pca_test.cu index 19f4ebcc83..d869c99257 100644 --- a/cpp/test/sg/pca_test.cu +++ b/cpp/test/sg/pca_test.cu @@ -52,7 +52,8 @@ class PcaTest : public ::testing::TestWithParam> { protected: void basicTest() { - params = ::testing::TestWithParam>::GetParam(); + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); raft::random::Rng r(params.seed, raft::random::GenTaps); int len = params.len; @@ -115,7 +116,8 @@ class PcaTest : public ::testing::TestWithParam> { void advancedTest() { - params = ::testing::TestWithParam>::GetParam(); + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); raft::random::Rng r(params.seed, raft::random::GenTaps); int len = params.len2; @@ -160,8 +162,6 @@ class PcaTest : public ::testing::TestWithParam> { void SetUp() override { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); basicTest(); advancedTest(); } @@ -189,7 +189,6 @@ class PcaTest : public ::testing::TestWithParam> { CUDA_CHECK(cudaFree(singular_vals2)); CUDA_CHECK(cudaFree(mean2)); CUDA_CHECK(cudaFree(noise_vars2)); - CUDA_CHECK(cudaStreamDestroy(stream)); } protected: @@ -200,7 +199,6 @@ class PcaTest : public ::testing::TestWithParam> { T *data2, *data2_trans, *data2_back, *components2, *explained_vars2, *explained_var_ratio2, *singular_vals2, *mean2, *noise_vars2; raft::handle_t handle; - cudaStream_t stream = 0; }; const std::vector> inputsf2 = { @@ -217,7 +215,8 @@ TEST_P(PcaTestValF, Result) ASSERT_TRUE(devArrMatch(explained_vars, explained_vars_ref, params.n_col, - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } typedef PcaTest PcaTestValD; @@ -226,7 +225,8 @@ TEST_P(PcaTestValD, Result) ASSERT_TRUE(devArrMatch(explained_vars, explained_vars_ref, params.n_col, - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } typedef PcaTest PcaTestLeftVecF; @@ -235,7 +235,8 @@ TEST_P(PcaTestLeftVecF, Result) ASSERT_TRUE(devArrMatch(components, components_ref, (params.n_col * params.n_col), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } typedef PcaTest PcaTestLeftVecD; @@ -244,7 +245,8 @@ TEST_P(PcaTestLeftVecD, Result) ASSERT_TRUE(devArrMatch(components, components_ref, (params.n_col * params.n_col), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } typedef PcaTest PcaTestTransDataF; @@ -253,7 +255,8 @@ TEST_P(PcaTestTransDataF, Result) ASSERT_TRUE(devArrMatch(trans_data, trans_data_ref, (params.n_row * params.n_col), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } typedef PcaTest PcaTestTransDataD; @@ -262,7 +265,8 @@ TEST_P(PcaTestTransDataD, Result) ASSERT_TRUE(devArrMatch(trans_data, trans_data_ref, (params.n_row * params.n_col), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } typedef PcaTest PcaTestDataVecSmallF; @@ -271,7 +275,8 @@ TEST_P(PcaTestDataVecSmallF, Result) ASSERT_TRUE(devArrMatch(data, data_back, (params.n_col * params.n_col), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } typedef PcaTest PcaTestDataVecSmallD; @@ -280,7 +285,8 @@ TEST_P(PcaTestDataVecSmallD, Result) ASSERT_TRUE(devArrMatch(data, data_back, (params.n_col * params.n_col), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } // FIXME: These tests are disabled due to driver 418+ making them fail: @@ -291,7 +297,8 @@ TEST_P(PcaTestDataVecF, Result) ASSERT_TRUE(devArrMatch(data2, data2_back, (params.n_col2 * params.n_col2), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } typedef PcaTest PcaTestDataVecD; @@ -300,7 +307,8 @@ TEST_P(PcaTestDataVecD, Result) ASSERT_TRUE(raft::devArrMatch(data2, data2_back, (params.n_col2 * params.n_col2), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } INSTANTIATE_TEST_CASE_P(PcaTests, PcaTestValF, ::testing::ValuesIn(inputsf2)); diff --git a/cpp/test/sg/rf_test.cu b/cpp/test/sg/rf_test.cu index 59aa2c29d3..86c08573a1 100644 --- a/cpp/test/sg/rf_test.cu +++ b/cpp/test/sg/rf_test.cu @@ -235,7 +235,8 @@ class RfSpecialisedTest { public: RfSpecialisedTest(RfTestParams params) : params(params) { - raft::handle_t handle(params.n_streams); + auto stream_pool = std::make_shared(params.n_streams); + raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool); X.resize(params.n_rows * params.n_cols); X_transpose.resize(params.n_rows * params.n_cols); y.resize(params.n_rows); @@ -294,7 +295,8 @@ class RfSpecialisedTest { if (params.n_trees > 1) { return; } // accuracy is not guaranteed to improve with bootstrapping if (params.bootstrap) { return; } - raft::handle_t handle(params.n_streams); + auto stream_pool = std::make_shared(params.n_streams); + raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool); RfTestParams alt_params = params; alt_params.max_depth--; auto [alt_forest, alt_predictions, alt_metrics] = @@ -349,7 +351,8 @@ class RfSpecialisedTest { if (is_regression) return; // Repeat training - raft::handle_t handle(params.n_streams); + auto stream_pool = std::make_shared(params.n_streams); + raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool); auto [alt_forest, alt_predictions, alt_metrics] = TrainScore(handle, params, X.data().get(), X_transpose.data().get(), y.data().get()); @@ -399,7 +402,8 @@ class RfSpecialisedTest { if constexpr (std::is_same_v) { return; } else { - raft::handle_t handle(params.n_streams); + auto stream_pool = std::make_shared(params.n_streams); + raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool); auto fil_pred = FilPredict(handle, params, X_transpose.data().get(), forest.get()); thrust::host_vector h_fil_pred(*fil_pred); @@ -638,7 +642,8 @@ TEST(RfTest, TextDump) std::vector y_host = {0, 0, 1, 1, 1, 0}; thrust::device_vector y = y_host; - raft::handle_t handle(1); + auto stream_pool = std::make_shared(1); + raft::handle_t handle(rmm::cuda_stream_per_thread, stream_pool); auto forest_ptr = forest.get(); fit(handle, forest_ptr, X.data().get(), y.size(), 1, y.data().get(), 2, rf_params); diff --git a/cpp/test/sg/ridge.cu b/cpp/test/sg/ridge.cu index 77f6a8fe03..c8608e919c 100644 --- a/cpp/test/sg/ridge.cu +++ b/cpp/test/sg/ridge.cu @@ -41,9 +41,10 @@ class RidgeTest : public ::testing::TestWithParam> { protected: void basicTest() { - params = ::testing::TestWithParam>::GetParam(); - int len = params.n_row * params.n_col; - int len2 = params.n_row_2 * params.n_col; + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); + int len = params.n_row * params.n_col; + int len2 = params.n_row_2 * params.n_col; raft::allocate(data, len, stream); raft::allocate(labels, params.n_row, stream); @@ -150,8 +151,9 @@ class RidgeTest : public ::testing::TestWithParam> { void basicTest2() { - params = ::testing::TestWithParam>::GetParam(); - int len = params.n_row * params.n_col; + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); + int len = params.n_row * params.n_col; raft::allocate(data_sc, len, stream); raft::allocate(labels_sc, len, stream); @@ -190,8 +192,6 @@ class RidgeTest : public ::testing::TestWithParam> { void SetUp() override { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); basicTest(); basicTest2(); } @@ -218,7 +218,6 @@ class RidgeTest : public ::testing::TestWithParam> { CUDA_CHECK(cudaFree(labels_sc)); CUDA_CHECK(cudaFree(coef_sc)); CUDA_CHECK(cudaFree(coef_sc_ref)); - CUDA_CHECK(cudaStreamDestroy(stream)); } protected: @@ -229,7 +228,6 @@ class RidgeTest : public ::testing::TestWithParam> { T *data_sc, *labels_sc, *coef_sc, *coef_sc_ref; T intercept, intercept2, intercept3; raft::handle_t handle; - cudaStream_t stream = 0; }; const std::vector> inputsf2 = {{0.001f, 3, 2, 2, 0, 0.5f}, @@ -241,51 +239,53 @@ const std::vector> inputsd2 = {{0.001, 3, 2, 2, 0, 0.5}, typedef RidgeTest RidgeTestF; TEST_P(RidgeTestF, Fit) { - ASSERT_TRUE( - raft::devArrMatch(coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol))); + auto stream = handle.get_stream(); + ASSERT_TRUE(raft::devArrMatch( + coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(coef3_ref, coef3, params.n_col, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + coef3_ref, coef3, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(pred_ref, pred, params.n_row_2, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + pred_ref, pred, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(pred2_ref, pred2, params.n_row_2, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + pred2_ref, pred2, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(pred3_ref, pred3, params.n_row_2, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + pred3_ref, pred3, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE( - raft::devArrMatch(coef_sc_ref, coef_sc, 1, raft::CompareApproxAbs(params.tol))); + raft::devArrMatch(coef_sc_ref, coef_sc, 1, raft::CompareApproxAbs(params.tol), stream)); } typedef RidgeTest RidgeTestD; TEST_P(RidgeTestD, Fit) { - ASSERT_TRUE( - raft::devArrMatch(coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol))); + auto stream = handle.get_stream(); + ASSERT_TRUE(raft::devArrMatch( + coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(coef3_ref, coef3, params.n_col, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + coef3_ref, coef3, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(pred_ref, pred, params.n_row_2, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + pred_ref, pred, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE(raft::devArrMatch( - pred2_ref, pred2, params.n_row_2, raft::CompareApproxAbs(params.tol))); + pred2_ref, pred2, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE(raft::devArrMatch( - pred3_ref, pred3, params.n_row_2, raft::CompareApproxAbs(params.tol))); + pred3_ref, pred3, params.n_row_2, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE( - raft::devArrMatch(coef_sc_ref, coef_sc, 1, raft::CompareApproxAbs(params.tol))); + raft::devArrMatch(coef_sc_ref, coef_sc, 1, raft::CompareApproxAbs(params.tol), stream)); } INSTANTIATE_TEST_CASE_P(RidgeTests, RidgeTestF, ::testing::ValuesIn(inputsf2)); diff --git a/cpp/test/sg/sgd.cu b/cpp/test/sg/sgd.cu index 42b15625c3..2ef61688b5 100644 --- a/cpp/test/sg/sgd.cu +++ b/cpp/test/sg/sgd.cu @@ -41,8 +41,9 @@ class SgdTest : public ::testing::TestWithParam> { protected: void linearRegressionTest() { - params = ::testing::TestWithParam>::GetParam(); - int len = params.n_row * params.n_col; + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); + int len = params.n_row * params.n_col; raft::allocate(data, len, stream); raft::allocate(labels, params.n_row, stream); @@ -126,8 +127,9 @@ class SgdTest : public ::testing::TestWithParam> { void logisticRegressionTest() { - params = ::testing::TestWithParam>::GetParam(); - int len = params.n_row2 * params.n_col2; + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); + int len = params.n_row2 * params.n_col2; T* coef_class; raft::allocate(data_logreg, len, stream); @@ -200,8 +202,9 @@ class SgdTest : public ::testing::TestWithParam> { void svmTest() { - params = ::testing::TestWithParam>::GetParam(); - int len = params.n_row2 * params.n_col2; + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); + int len = params.n_row2 * params.n_col2; T* coef_class; raft::allocate(data_svmreg, len, stream); @@ -274,8 +277,6 @@ class SgdTest : public ::testing::TestWithParam> { void SetUp() override { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); linearRegressionTest(); logisticRegressionTest(); svmTest(); @@ -299,7 +300,6 @@ class SgdTest : public ::testing::TestWithParam> { CUDA_CHECK(cudaFree(pred_svm_ref)); CUDA_CHECK(cudaFree(pred_log)); CUDA_CHECK(cudaFree(pred_log_ref)); - CUDA_CHECK(cudaStreamDestroy(stream)); } protected: @@ -310,7 +310,6 @@ class SgdTest : public ::testing::TestWithParam> { T *data_svmreg, *data_svmreg_test, *labels_svmreg; T *pred_svm, *pred_svm_ref, *pred_log, *pred_log_ref; T intercept, intercept2; - cudaStream_t stream = 0; raft::handle_t handle; }; @@ -321,33 +320,35 @@ const std::vector> inputsd2 = {{0.01, 4, 2, 4, 3, 2}}; typedef SgdTest SgdTestF; TEST_P(SgdTestF, Fit) { - ASSERT_TRUE( - raft::devArrMatch(coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol))); + auto stream = handle.get_stream(); + ASSERT_TRUE(raft::devArrMatch( + coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE(raft::devArrMatch( - pred_log_ref, pred_log, params.n_row, raft::CompareApproxAbs(params.tol))); + pred_log_ref, pred_log, params.n_row, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE(raft::devArrMatch( - pred_svm_ref, pred_svm, params.n_row, raft::CompareApproxAbs(params.tol))); + pred_svm_ref, pred_svm, params.n_row, raft::CompareApproxAbs(params.tol), stream)); } typedef SgdTest SgdTestD; TEST_P(SgdTestD, Fit) { - ASSERT_TRUE( - raft::devArrMatch(coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol))); + auto stream = handle.get_stream(); + ASSERT_TRUE(raft::devArrMatch( + coef_ref, coef, params.n_col, raft::CompareApproxAbs(params.tol), stream)); - ASSERT_TRUE( - raft::devArrMatch(coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol))); + ASSERT_TRUE(raft::devArrMatch( + coef2_ref, coef2, params.n_col, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE(raft::devArrMatch( - pred_log_ref, pred_log, params.n_row, raft::CompareApproxAbs(params.tol))); + pred_log_ref, pred_log, params.n_row, raft::CompareApproxAbs(params.tol), stream)); ASSERT_TRUE(raft::devArrMatch( - pred_svm_ref, pred_svm, params.n_row, raft::CompareApproxAbs(params.tol))); + pred_svm_ref, pred_svm, params.n_row, raft::CompareApproxAbs(params.tol), stream)); } INSTANTIATE_TEST_CASE_P(SgdTests, SgdTestF, ::testing::ValuesIn(inputsf2)); diff --git a/cpp/test/sg/svc_test.cu b/cpp/test/sg/svc_test.cu index 5938e9645b..cfb5487d61 100644 --- a/cpp/test/sg/svc_test.cu +++ b/cpp/test/sg/svc_test.cu @@ -63,8 +63,7 @@ class WorkingSetTest : public ::testing::Test { protected: void SetUp() override { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); + auto stream = this->handle.get_stream(); raft::allocate(f_dev, 10, stream); raft::allocate(y_dev, 10, stream); raft::allocate(C_dev, 10, stream); @@ -77,14 +76,12 @@ class WorkingSetTest : public ::testing::Test { void TearDown() override { - CUDA_CHECK(cudaStreamDestroy(stream)); CUDA_CHECK(cudaFree(f_dev)); CUDA_CHECK(cudaFree(y_dev)); CUDA_CHECK(cudaFree(C_dev)); CUDA_CHECK(cudaFree(alpha_dev)); } raft::handle_t handle; - cudaStream_t stream = 0; WorkingSet* ws; math_t f_host[10] = {1, 3, 10, 4, 2, 8, 6, 5, 9, 7}; @@ -109,30 +106,35 @@ TYPED_TEST_CASE(WorkingSetTest, FloatTypes); TYPED_TEST(WorkingSetTest, Init) { - this->ws = new WorkingSet(this->handle, this->handle.get_stream(), 10); + auto stream = this->handle.get_stream(); + this->ws = new WorkingSet(this->handle, this->handle.get_stream(), 10); EXPECT_EQ(this->ws->GetSize(), 10); delete this->ws; - this->ws = new WorkingSet(this->handle, this->stream, 100000); + this->ws = new WorkingSet(this->handle, stream, 100000); EXPECT_EQ(this->ws->GetSize(), 1024); delete this->ws; } TYPED_TEST(WorkingSetTest, Select) { - this->ws = new WorkingSet(this->handle, this->stream, 10, 4); + auto stream = this->handle.get_stream(); + this->ws = new WorkingSet(this->handle, stream, 10, 4); EXPECT_EQ(this->ws->GetSize(), 4); this->ws->SimpleSelect(this->f_dev, this->alpha_dev, this->y_dev, this->C_dev); ASSERT_TRUE(devArrMatchHost( - this->expected_idx, this->ws->GetIndices(), this->ws->GetSize(), raft::Compare())); + this->expected_idx, this->ws->GetIndices(), this->ws->GetSize(), raft::Compare(), stream)); this->ws->Select(this->f_dev, this->alpha_dev, this->y_dev, this->C_dev); ASSERT_TRUE(devArrMatchHost( - this->expected_idx, this->ws->GetIndices(), this->ws->GetSize(), raft::Compare())); + this->expected_idx, this->ws->GetIndices(), this->ws->GetSize(), raft::Compare(), stream)); this->ws->Select(this->f_dev, this->alpha_dev, this->y_dev, this->C_dev); - ASSERT_TRUE(devArrMatchHost( - this->expected_idx2, this->ws->GetIndices(), this->ws->GetSize(), raft::Compare())); + ASSERT_TRUE(devArrMatchHost(this->expected_idx2, + this->ws->GetIndices(), + this->ws->GetSize(), + raft::Compare(), + stream)); delete this->ws; } @@ -145,8 +147,7 @@ class KernelCacheTest : public ::testing::Test { protected: void SetUp() override { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); + auto stream = this->handle.get_stream(); cublas_handle = handle.get_cublas_handle(); raft::allocate(x_dev, n_rows * n_cols, stream); raft::update_device(x_dev, x_host, n_rows * n_cols, stream); @@ -157,7 +158,6 @@ class KernelCacheTest : public ::testing::Test { void TearDown() override { - CUDA_CHECK(cudaStreamDestroy(stream)); CUDA_CHECK(cudaFree(x_dev)); CUDA_CHECK(cudaFree(ws_idx_dev)); } @@ -197,6 +197,7 @@ class KernelCacheTest : public ::testing::Test { void check(const math_t* tile_dev, int n_ws, int n_rows, const int* ws_idx, const int* kColIdx) { + auto stream = this->handle.get_stream(); std::vector ws_idx_h(n_ws); raft::update_host(ws_idx_h.data(), ws_idx, n_ws, stream); std::vector kidx_h(n_ws); @@ -210,13 +211,13 @@ class KernelCacheTest : public ::testing::Test { int kidx = kidx_h[i]; const math_t* cache_row = tile_dev + kidx * n_rows; const math_t* row_exp = tile_host_all + widx * n_rows; - EXPECT_TRUE(devArrMatchHost(row_exp, cache_row, n_rows, raft::CompareApprox(1e-6f))); + EXPECT_TRUE( + devArrMatchHost(row_exp, cache_row, n_rows, raft::CompareApprox(1e-6f), stream)); } } raft::handle_t handle; cublasHandle_t cublas_handle; - cudaStream_t stream = 0; int n_rows = 4; int n_cols = 2; @@ -235,6 +236,7 @@ TYPED_TEST_CASE_P(KernelCacheTest); TYPED_TEST_P(KernelCacheTest, EvalTest) { + auto stream = this->handle.get_stream(); std::vector param_vec{Matrix::KernelParams{Matrix::LINEAR, 3, 1, 0}, Matrix::KernelParams{Matrix::POLYNOMIAL, 2, 1.3, 1}, Matrix::KernelParams{Matrix::TANH, 2, 0.5, 2.4}, @@ -252,7 +254,8 @@ TYPED_TEST_P(KernelCacheTest, EvalTest) ASSERT_TRUE(devArrMatchHost(this->tile_host_expected, tile_dev, this->n_rows * this->n_ws, - raft::CompareApprox(1e-6f))); + raft::CompareApprox(1e-6f), + stream)); delete kernel; } } @@ -276,12 +279,13 @@ TYPED_TEST_P(KernelCacheTest, CacheEvalTest) TYPED_TEST_P(KernelCacheTest, SvrEvalTest) { + auto stream = this->handle.get_stream(); Matrix::KernelParams param{Matrix::LINEAR, 3, 1, 0}; float cache_size = sizeof(TypeParam) * this->n_rows * 32 / (1024.0 * 1024); this->n_ws = 6; int ws_idx_svr[6] = {0, 5, 1, 4, 3, 7}; - raft::update_device(this->ws_idx_dev, ws_idx_svr, 6, this->stream); + raft::update_device(this->ws_idx_dev, ws_idx_svr, 6, stream); Matrix::GramMatrixBase* kernel = Matrix::KernelFactory::create(param, this->handle.get_cublas_handle()); @@ -308,16 +312,9 @@ INSTANTIATE_TYPED_TEST_CASE_P(My, KernelCacheTest, FloatTypes); template class GetResultsTest : public ::testing::Test { protected: - void SetUp() override - { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); - } - - void TearDown() override { CUDA_CHECK(cudaStreamDestroy(stream)); } - void TestResults() { + auto stream = this->handle.get_stream(); rmm::device_uvector x_dev(n_rows * n_cols, stream); raft::update_device(x_dev.data(), x_host, n_rows * n_cols, stream); rmm::device_uvector f_dev(n_rows, stream); @@ -334,15 +331,15 @@ class GetResultsTest : public ::testing::Test { ASSERT_EQ(n_coefs, 7); math_t dual_coefs_exp[] = {-0.1, -0.2, -1.5, 0.2, 0.4, 1.5, 1.5}; - EXPECT_TRUE( - devArrMatchHost(dual_coefs_exp, dual_coefs, n_coefs, raft::CompareApprox(1e-6f))); + EXPECT_TRUE(devArrMatchHost( + dual_coefs_exp, dual_coefs, n_coefs, raft::CompareApprox(1e-6f), stream)); int idx_exp[] = {2, 3, 4, 6, 7, 8, 9}; - EXPECT_TRUE(devArrMatchHost(idx_exp, idx, n_coefs, raft::Compare())); + EXPECT_TRUE(devArrMatchHost(idx_exp, idx, n_coefs, raft::Compare(), stream)); math_t x_support_exp[] = {3, 4, 5, 7, 8, 9, 10, 13, 14, 15, 17, 18, 19, 20}; EXPECT_TRUE(devArrMatchHost( - x_support_exp, x_support, n_coefs * n_cols, raft::CompareApprox(1e-6f))); + x_support_exp, x_support, n_coefs * n_cols, raft::CompareApprox(1e-6f), stream)); EXPECT_FLOAT_EQ(b, -6.25f); @@ -368,7 +365,6 @@ class GetResultsTest : public ::testing::Test { math_t b; raft::handle_t handle; - cudaStream_t stream = 0; }; TYPED_TEST_CASE(GetResultsTest, FloatTypes); @@ -436,8 +432,7 @@ class SmoBlockSolverTest : public ::testing::Test { protected: void SetUp() override { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); + auto stream = this->handle.get_stream(); cublas_handle = handle.get_cublas_handle(); raft::allocate(ws_idx_dev, n_ws, stream); raft::allocate(y_dev, n_rows, stream); @@ -458,6 +453,7 @@ class SmoBlockSolverTest : public ::testing::Test { public: // because of the device lambda void testBlockSolve() { + auto stream = this->handle.get_stream(); SmoBlockSolve<<<1, n_ws, 0, stream>>>(y_dev, n_rows, alpha_dev, @@ -473,7 +469,7 @@ class SmoBlockSolverTest : public ::testing::Test { CUDA_CHECK(cudaPeekAtLastError()); math_t return_buff_exp[2] = {0.2, 1}; - devArrMatchHost(return_buff_exp, return_buff_dev, 2, raft::CompareApprox(1e-6)); + devArrMatchHost(return_buff_exp, return_buff_dev, 2, raft::CompareApprox(1e-6), stream); math_t* delta_alpha_calc; raft::allocate(delta_alpha_calc, n_rows, stream); @@ -484,16 +480,16 @@ class SmoBlockSolverTest : public ::testing::Test { n_rows, [] __device__(math_t a, math_t b) { return a * b; }, stream); - raft::devArrMatch(delta_alpha_dev, delta_alpha_calc, n_rows, raft::CompareApprox(1e-6)); + raft::devArrMatch( + delta_alpha_dev, delta_alpha_calc, n_rows, raft::CompareApprox(1e-6), stream); CUDA_CHECK(cudaFree(delta_alpha_calc)); math_t alpha_expected[] = {0, 0.1f, 0.1f, 0}; - raft::devArrMatch(alpha_expected, alpha_dev, n_rows, raft::CompareApprox(1e-6)); + raft::devArrMatch(alpha_expected, alpha_dev, n_rows, raft::CompareApprox(1e-6), stream); } protected: void TearDown() override { - CUDA_CHECK(cudaStreamDestroy(stream)); CUDA_CHECK(cudaFree(y_dev)); CUDA_CHECK(cudaFree(C_dev)); CUDA_CHECK(cudaFree(f_dev)); @@ -505,7 +501,6 @@ class SmoBlockSolverTest : public ::testing::Test { } raft::handle_t handle; - cudaStream_t stream = 0; cublasHandle_t cublas_handle; int n_rows = 4; @@ -626,11 +621,13 @@ void checkResults(SvmModel model, EXPECT_TRUE(devArrMatchHost(x_support_exp, model.x_support, model.n_support * model.n_cols, - raft::CompareApprox(1e-6f))); + raft::CompareApprox(1e-6f), + stream)); } if (idx_exp) { - EXPECT_TRUE(devArrMatchHost(idx_exp, model.support_idx, model.n_support, raft::Compare())); + EXPECT_TRUE( + devArrMatchHost(idx_exp, model.support_idx, model.n_support, raft::Compare(), stream)); } math_t* x_support_host = new math_t[model.n_support * model.n_cols]; @@ -667,8 +664,7 @@ class SmoSolverTest : public ::testing::Test { protected: void SetUp() override { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); + auto stream = handle.get_stream(); raft::allocate(x_dev, n_rows * n_cols, stream); raft::allocate(ws_idx_dev, n_ws, stream); raft::allocate(y_dev, n_rows, stream); @@ -706,7 +702,6 @@ class SmoSolverTest : public ::testing::Test { void TearDown() override { delete kernel; - CUDA_CHECK(cudaStreamDestroy(stream)); CUDA_CHECK(cudaFree(x_dev)); CUDA_CHECK(cudaFree(y_dev)); CUDA_CHECK(cudaFree(C_dev)); @@ -724,6 +719,7 @@ class SmoSolverTest : public ::testing::Test { public: void blockSolveTest() { + auto stream = this->handle.get_stream(); SmoBlockSolve<<<1, n_ws, 0, stream>>>(y_dev, n_rows, alpha_dev, @@ -753,12 +749,13 @@ class SmoSolverTest : public ::testing::Test { n_rows, [] __device__(math_t a, math_t b) { return a * b; }, stream); - raft::devArrMatch(delta_alpha_dev, delta_alpha_calc, n_rows, raft::CompareApprox(1e-6)); + raft::devArrMatch( + delta_alpha_dev, delta_alpha_calc, n_rows, raft::CompareApprox(1e-6), stream); CUDA_CHECK(cudaFree(delta_alpha_calc)); math_t alpha_expected[] = {0.6f, 0, 1, 1, 0, 0.6f}; // for C=10: {0.25f, 0, 2.25f, 3.75f, 0, 1.75f}; - raft::devArrMatch(alpha_expected, alpha_dev, n_rows, raft::CompareApprox(1e-6)); + raft::devArrMatch(alpha_expected, alpha_dev, n_rows, raft::CompareApprox(1e-6), stream); math_t host_alpha[6]; raft::update_host(host_alpha, alpha_dev, n_rows, stream); @@ -781,8 +778,9 @@ class SmoSolverTest : public ::testing::Test { void svrBlockSolveTest() { - int n_ws = 4; - int n_rows = 2; + auto stream = this->handle.get_stream(); + int n_ws = 4; + int n_rows = 2; // int n_cols = 1; // math_t x[2] = {1, 2}; // yr = {2, 3} @@ -816,15 +814,14 @@ class SmoSolverTest : public ::testing::Test { EXPECT_LT(return_buff[1], 10) << return_buff[1]; math_t alpha_exp[] = {0, 0.8, 0.8, 0}; - raft::devArrMatch(alpha_exp, alpha_dev, 4, raft::CompareApprox(1e-6)); + raft::devArrMatch(alpha_exp, alpha_dev, 4, raft::CompareApprox(1e-6), stream); math_t dalpha_exp[] = {-0.8, 0.8}; - raft::devArrMatch(dalpha_exp, delta_alpha_dev, 2, raft::CompareApprox(1e-6)); + raft::devArrMatch(dalpha_exp, delta_alpha_dev, 2, raft::CompareApprox(1e-6), stream); } protected: raft::handle_t handle; - cudaStream_t stream = 0; Matrix::GramMatrixBase* kernel; int n_rows = 6; const int n_cols = 2; @@ -879,6 +876,7 @@ std::ostream& operator<<(std::ostream& os, const smoInput& b) TYPED_TEST(SmoSolverTest, SmoSolveTest) { + auto stream = this->handle.get_stream(); std::vector, smoOutput>> data{ {smoInput{1, 0.001, KernelParams{LINEAR, 3, 1, 0}, 100, 1}, smoOutput{4, // n_sv @@ -917,13 +915,14 @@ TYPED_TEST(SmoSolverTest, SmoSolveTest) &model.b, p.max_iter, p.max_inner_iter); - checkResults(model, exp, this->stream); + checkResults(model, exp, stream); svmFreeBuffers(this->handle, model); } } TYPED_TEST(SmoSolverTest, SvcTest) { + auto stream = this->handle.get_stream(); std::vector, smoOutput2>> data{ {svcInput{1, 0.001, @@ -1016,19 +1015,20 @@ TYPED_TEST(SmoSolverTest, SvcTest) } SVC svc(this->handle, p.C, p.tol, p.kernel_params); svc.fit(p.x_dev, p.n_rows, p.n_cols, p.y_dev, sample_weights); - checkResults(svc.model, toSmoOutput(exp), this->stream); - rmm::device_uvector y_pred(p.n_rows, this->stream); + checkResults(svc.model, toSmoOutput(exp), stream); + rmm::device_uvector y_pred(p.n_rows, stream); if (p.predict) { svc.predict(p.x_dev, p.n_rows, p.n_cols, y_pred.data()); EXPECT_TRUE(raft::devArrMatch( - this->y_dev, y_pred.data(), p.n_rows, raft::CompareApprox(1e-6f))); + this->y_dev, y_pred.data(), p.n_rows, raft::CompareApprox(1e-6f), stream)); } if (exp.decision_function.size() > 0) { svc.decisionFunction(p.x_dev, p.n_rows, p.n_cols, y_pred.data()); EXPECT_TRUE(devArrMatchHost(exp.decision_function.data(), y_pred.data(), p.n_rows, - raft::CompareApprox(1e-3f))); + raft::CompareApprox(1e-3f), + stream)); } } } @@ -1109,6 +1109,7 @@ struct is_same_functor { TYPED_TEST(SmoSolverTest, BlobPredict) { + auto stream = this->handle.get_stream(); // Pair.second is the expected accuracy. It might change if the Rng changes. std::vector> data{ {blobInput{1, 0.001, KernelParams{LINEAR, 3, 1, 0}, 200, 10}, 98}, @@ -1123,16 +1124,16 @@ TYPED_TEST(SmoSolverTest, BlobPredict) auto p = d.first; SCOPED_TRACE(p); // explicit centers for the blobs - rmm::device_uvector centers(2 * p.n_cols, this->stream); + rmm::device_uvector centers(2 * p.n_cols, stream); thrust::device_ptr thrust_ptr(centers.data()); - thrust::fill(thrust::cuda::par.on(this->stream), thrust_ptr, thrust_ptr + p.n_cols, -5.0f); + thrust::fill(thrust::cuda::par.on(stream), thrust_ptr, thrust_ptr + p.n_cols, -5.0f); thrust::fill( - thrust::cuda::par.on(this->stream), thrust_ptr + p.n_cols, thrust_ptr + 2 * p.n_cols, +5.0f); + thrust::cuda::par.on(stream), thrust_ptr + p.n_cols, thrust_ptr + 2 * p.n_cols, +5.0f); - rmm::device_uvector x(p.n_rows * p.n_cols, this->stream); - rmm::device_uvector y(p.n_rows, this->stream); - rmm::device_uvector x_pred(n_pred * p.n_cols, this->stream); - rmm::device_uvector y_pred(n_pred, this->stream); + rmm::device_uvector x(p.n_rows * p.n_cols, stream); + rmm::device_uvector y(p.n_rows, stream); + rmm::device_uvector x_pred(n_pred * p.n_cols, stream); + rmm::device_uvector y_pred(n_pred, stream); make_blobs(this->handle, x.data(), y.data(), p.n_rows, p.n_cols, 2, centers.data()); SVC svc(this->handle, p.C, p.tol, p.kernel_params, 0, -1, 50, CUML_LEVEL_INFO); @@ -1140,18 +1141,18 @@ TYPED_TEST(SmoSolverTest, BlobPredict) // Create a different dataset for prediction make_blobs(this->handle, x_pred.data(), y_pred.data(), n_pred, p.n_cols, 2, centers.data()); - rmm::device_uvector y_pred2(n_pred, this->stream); + rmm::device_uvector y_pred2(n_pred, stream); svc.predict(x_pred.data(), n_pred, p.n_cols, y_pred2.data()); // Count the number of correct predictions - rmm::device_uvector is_correct(n_pred, this->stream); + rmm::device_uvector is_correct(n_pred, stream); thrust::device_ptr ptr1(y_pred.data()); thrust::device_ptr ptr2(y_pred2.data()); thrust::device_ptr ptr3(is_correct.data()); auto first = thrust::make_zip_iterator(thrust::make_tuple(ptr1, ptr2)); auto last = thrust::make_zip_iterator(thrust::make_tuple(ptr1 + n_pred, ptr2 + n_pred)); - thrust::transform(thrust::cuda::par.on(this->stream), first, last, ptr3, is_same_functor()); - int n_correct = thrust::reduce(thrust::cuda::par.on(this->stream), ptr3, ptr3 + n_pred); + thrust::transform(thrust::cuda::par.on(stream), first, last, ptr3, is_same_functor()); + int n_correct = thrust::reduce(thrust::cuda::par.on(stream), ptr3, ptr3 + n_pred); TypeParam accuracy = 100 * n_correct / n_pred; TypeParam accuracy_exp = d.second; @@ -1161,6 +1162,7 @@ TYPED_TEST(SmoSolverTest, BlobPredict) TYPED_TEST(SmoSolverTest, MemoryLeak) { + auto stream = this->handle.get_stream(); // We measure that we have the same amount of free memory available on the GPU // before and after we call SVM. This can help catch memory leaks, but it is // not 100% sure. Small allocations might be pooled together by cudaMalloc, @@ -1179,8 +1181,8 @@ TYPED_TEST(SmoSolverTest, MemoryLeak) auto p = d.first; SCOPED_TRACE(p); - rmm::device_uvector x(p.n_rows * p.n_cols, this->stream); - rmm::device_uvector y(p.n_rows, this->stream); + rmm::device_uvector x(p.n_rows * p.n_cols, stream); + rmm::device_uvector y(p.n_rows, stream); make_blobs(this->handle, x.data(), y.data(), p.n_rows, p.n_cols, 2); SVC svc(this->handle, p.C, p.tol, p.kernel_params); @@ -1190,8 +1192,8 @@ TYPED_TEST(SmoSolverTest, MemoryLeak) EXPECT_THROW(svc.fit(x.data(), p.n_rows, p.n_cols, y.data()), raft::exception); } else { svc.fit(x.data(), p.n_rows, p.n_cols, y.data()); - rmm::device_uvector y_pred(p.n_rows, this->stream); - CUDA_CHECK(cudaStreamSynchronize(this->stream)); + rmm::device_uvector y_pred(p.n_rows, stream); + CUDA_CHECK(cudaStreamSynchronize(stream)); CUDA_CHECK(cudaMemGetInfo(&free2, &total)); float delta = (free1 - free2); // Just to make sure that we measure any mem consumption at all: @@ -1200,7 +1202,7 @@ TYPED_TEST(SmoSolverTest, MemoryLeak) // it (one could additionally control the exec time by the max_iter arg to // SVC). EXPECT_GT(delta, p.n_rows * p.n_cols * 4); - CUDA_CHECK(cudaStreamSynchronize(this->stream)); + CUDA_CHECK(cudaStreamSynchronize(stream)); svc.predict(x.data(), p.n_rows, p.n_cols, y_pred.data()); } } @@ -1211,6 +1213,7 @@ TYPED_TEST(SmoSolverTest, MemoryLeak) TYPED_TEST(SmoSolverTest, DISABLED_MillionRows) { + auto stream = this->handle.get_stream(); if (sizeof(TypeParam) == 8) { GTEST_SKIP(); // Skip the test for double imput } else { @@ -1229,17 +1232,15 @@ TYPED_TEST(SmoSolverTest, DISABLED_MillionRows) auto p = d.first; SCOPED_TRACE(p); // explicit centers for the blobs - rmm::device_uvector centers(2 * p.n_cols, this->stream); + rmm::device_uvector centers(2 * p.n_cols, stream); thrust::device_ptr thrust_ptr(centers.data()); - thrust::fill(thrust::cuda::par.on(this->stream), thrust_ptr, thrust_ptr + p.n_cols, -5.0f); - thrust::fill(thrust::cuda::par.on(this->stream), - thrust_ptr + p.n_cols, - thrust_ptr + 2 * p.n_cols, - +5.0f); - - rmm::device_uvector x(p.n_rows * p.n_cols, this->stream); - rmm::device_uvector y(p.n_rows, this->stream); - rmm::device_uvector y_pred(p.n_rows, this->stream); + thrust::fill(thrust::cuda::par.on(stream), thrust_ptr, thrust_ptr + p.n_cols, -5.0f); + thrust::fill( + thrust::cuda::par.on(stream), thrust_ptr + p.n_cols, thrust_ptr + 2 * p.n_cols, +5.0f); + + rmm::device_uvector x(p.n_rows * p.n_cols, stream); + rmm::device_uvector y(p.n_rows, stream); + rmm::device_uvector y_pred(p.n_rows, stream); make_blobs(this->handle, x.data(), y.data(), p.n_rows, p.n_cols, 2, centers.data()); const int max_iter = 2; SVC svc( @@ -1275,8 +1276,7 @@ class SvrTest : public ::testing::Test { protected: void SetUp() override { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); + auto stream = handle.get_stream(); raft::allocate(x_dev, n_rows * n_cols, stream); raft::allocate(y_dev, n_rows, stream); raft::allocate(C_dev, 2 * n_rows, stream); @@ -1299,7 +1299,6 @@ class SvrTest : public ::testing::Test { void TearDown() override { - CUDA_CHECK(cudaStreamDestroy(stream)); CUDA_CHECK(cudaFree(x_dev)); CUDA_CHECK(cudaFree(y_dev)); CUDA_CHECK(cudaFree(C_dev)); @@ -1313,17 +1312,19 @@ class SvrTest : public ::testing::Test { public: void TestSvrInit() { + auto stream = this->handle.get_stream(); SvmParameter param = getDefaultSvmParameter(); param.svmType = EPSILON_SVR; SmoSolver smo(handle, param, nullptr); smo.SvrInit(y_dev, n_rows, yc, f); - EXPECT_TRUE(devArrMatchHost(yc_exp, yc, n_train, raft::CompareApprox(1.0e-9))); - EXPECT_TRUE(devArrMatchHost(f_exp, f, n_train, raft::Compare())); + EXPECT_TRUE(devArrMatchHost(yc_exp, yc, n_train, raft::CompareApprox(1.0e-9), stream)); + EXPECT_TRUE(devArrMatchHost(f_exp, f, n_train, raft::Compare(), stream)); } void TestSvrWorkingSet() { + auto stream = this->handle.get_stream(); init_C((math_t)1.0, C_dev, 2 * n_rows, stream); WorkingSet* ws; ws = new WorkingSet(handle, stream, n_rows, 20, EPSILON_SVR); @@ -1335,7 +1336,8 @@ class SvrTest : public ::testing::Test { ws->Select(f, alpha, yc, C_dev); int exp_idx[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}; - ASSERT_TRUE(devArrMatchHost(exp_idx, ws->GetIndices(), ws->GetSize(), raft::Compare())); + ASSERT_TRUE( + devArrMatchHost(exp_idx, ws->GetIndices(), ws->GetSize(), raft::Compare(), stream)); delete ws; @@ -1343,12 +1345,14 @@ class SvrTest : public ::testing::Test { EXPECT_EQ(ws->GetSize(), 10); ws->Select(f, alpha, yc, C_dev); int exp_idx2[] = {6, 12, 5, 11, 3, 9, 8, 1, 7, 0}; - ASSERT_TRUE(devArrMatchHost(exp_idx2, ws->GetIndices(), ws->GetSize(), raft::Compare())); + ASSERT_TRUE( + devArrMatchHost(exp_idx2, ws->GetIndices(), ws->GetSize(), raft::Compare(), stream)); delete ws; } void TestSvrResults() { + auto stream = this->handle.get_stream(); raft::update_device(yc, yc_exp, n_train, stream); init_C((math_t)0.001, C_dev, n_rows * 2, stream); Results res(handle, x_dev, yc, n_rows, n_cols, C_dev, EPSILON_SVR); @@ -1366,19 +1370,23 @@ class SvrTest : public ::testing::Test { ASSERT_EQ(model.n_support, 5); math_t dc_exp[] = {0.1, 0.3, -0.4, 0.9, -0.9}; EXPECT_TRUE(devArrMatchHost( - dc_exp, model.dual_coefs, model.n_support, raft::CompareApprox(1.0e-6))); + dc_exp, model.dual_coefs, model.n_support, raft::CompareApprox(1.0e-6), stream)); math_t x_exp[] = {1, 2, 3, 5, 6}; - EXPECT_TRUE(devArrMatchHost( - x_exp, model.x_support, model.n_support * n_cols, raft::CompareApprox(1.0e-6))); + EXPECT_TRUE(devArrMatchHost(x_exp, + model.x_support, + model.n_support * n_cols, + raft::CompareApprox(1.0e-6), + stream)); int idx_exp[] = {0, 1, 2, 4, 5}; EXPECT_TRUE(devArrMatchHost( - idx_exp, model.support_idx, model.n_support, raft::CompareApprox(1.0e-6))); + idx_exp, model.support_idx, model.n_support, raft::CompareApprox(1.0e-6), stream)); } void TestSvrFitPredict() { + auto stream = this->handle.get_stream(); std::vector, smoOutput2>> data{ {SvrInput{ SvmParameter{1, 0, 1, 10, 1e-3, CUML_LEVEL_INFO, 0.1, EPSILON_SVR}, @@ -1485,17 +1493,17 @@ class SvrTest : public ::testing::Test { EXPECT_TRUE(devArrMatchHost(exp.decision_function.data(), preds.data(), p.n_rows, - raft::CompareApprox(1.0e-5))); + raft::CompareApprox(1.0e-5), + stream)); } } } protected: raft::handle_t handle; - cudaStream_t stream = 0; - int n_rows = 7; - int n_train = 2 * n_rows; - const int n_cols = 1; + int n_rows = 7; + int n_train = 2 * n_rows; + const int n_cols = 1; SvmModel model; math_t* x_dev; diff --git a/cpp/test/sg/tsvd_test.cu b/cpp/test/sg/tsvd_test.cu index f5bbc84071..2407302f33 100644 --- a/cpp/test/sg/tsvd_test.cu +++ b/cpp/test/sg/tsvd_test.cu @@ -50,7 +50,8 @@ class TsvdTest : public ::testing::TestWithParam> { protected: void basicTest() { - params = ::testing::TestWithParam>::GetParam(); + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); raft::random::Rng r(params.seed, raft::random::GenTaps); int len = params.len; @@ -85,7 +86,8 @@ class TsvdTest : public ::testing::TestWithParam> { void advancedTest() { - params = ::testing::TestWithParam>::GetParam(); + auto stream = handle.get_stream(); + params = ::testing::TestWithParam>::GetParam(); raft::random::Rng r(params.seed, raft::random::GenTaps); int len = params.len2; @@ -126,8 +128,6 @@ class TsvdTest : public ::testing::TestWithParam> { void SetUp() override { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); basicTest(); advancedTest(); } @@ -145,7 +145,6 @@ class TsvdTest : public ::testing::TestWithParam> { CUDA_CHECK(cudaFree(explained_vars2)); CUDA_CHECK(cudaFree(explained_var_ratio2)); CUDA_CHECK(cudaFree(singular_vals2)); - CUDA_CHECK(cudaStreamDestroy(stream)); } protected: @@ -154,7 +153,6 @@ class TsvdTest : public ::testing::TestWithParam> { T *data2, *data2_trans, *data2_back, *components2, *explained_vars2, *explained_var_ratio2, *singular_vals2; raft::handle_t handle; - cudaStream_t stream = 0; }; const std::vector> inputsf2 = { @@ -175,7 +173,8 @@ TEST_P(TsvdTestLeftVecF, Result) ASSERT_TRUE(raft::devArrMatch(components, components_ref, (params.n_col * params.n_col), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } typedef TsvdTest TsvdTestLeftVecD; @@ -184,7 +183,8 @@ TEST_P(TsvdTestLeftVecD, Result) ASSERT_TRUE(raft::devArrMatch(components, components_ref, (params.n_col * params.n_col), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } typedef TsvdTest TsvdTestDataVecF; @@ -193,7 +193,8 @@ TEST_P(TsvdTestDataVecF, Result) ASSERT_TRUE(raft::devArrMatch(data2, data2_back, (params.n_col2 * params.n_col2), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } typedef TsvdTest TsvdTestDataVecD; @@ -202,7 +203,8 @@ TEST_P(TsvdTestDataVecD, Result) ASSERT_TRUE(raft::devArrMatch(data2, data2_back, (params.n_col2 * params.n_col2), - raft::CompareApproxAbs(params.tolerance))); + raft::CompareApproxAbs(params.tolerance), + handle.get_stream())); } INSTANTIATE_TEST_CASE_P(TsvdTests, TsvdTestLeftVecF, ::testing::ValuesIn(inputsf2)); diff --git a/python/cuml/common/base.pyx b/python/cuml/common/base.pyx index a84bf0eaba..4b0873a3d7 100644 --- a/python/cuml/common/base.pyx +++ b/python/cuml/common/base.pyx @@ -143,8 +143,7 @@ class Base(TagsMixin, # stream and handle example: stream = cuml.cuda.Stream() - handle = cuml.Handle() - handle.setStream(stream) + handle = cuml.Handle(stream=stream) algo = MyAlgo(handle=handle) algo.fit(...) @@ -160,13 +159,14 @@ class Base(TagsMixin, def __init__(self, *, handle=None, verbose=False, - output_type=None): + output_type=None, + handle_kwargs=None): """ Constructor. All children must call init method of this base class. """ - self.handle = cuml.raft.common.handle.Handle() if handle is None \ - else handle + self.handle = cuml.raft.common.handle.Handle(**handle_kwargs) \ + if handle is None else handle # Internally, self.verbose follows the spdlog/c++ standard of # 0 is most logging, and logging decreases from there. diff --git a/python/cuml/ensemble/randomforest_common.pyx b/python/cuml/ensemble/randomforest_common.pyx index 7e7a6b1dc8..f69b123cb5 100644 --- a/python/cuml/ensemble/randomforest_common.pyx +++ b/python/cuml/ensemble/randomforest_common.pyx @@ -111,7 +111,7 @@ class BaseRandomForestModel(Base): "due to stream/thread timing differences, even when " "random_state is set") if handle is None: - handle = Handle(n_streams) + handle = Handle(n_streams=n_streams) super(BaseRandomForestModel, self).__init__( handle=handle, @@ -145,7 +145,7 @@ class BaseRandomForestModel(Base): self.dtype = dtype self.accuracy_metric = accuracy_metric self.max_batch_size = max_batch_size - self.n_streams = handle.getNumInternalStreams() + self.n_streams = n_streams self.random_state = random_state self.rf_forest = 0 self.rf_forest64 = 0 diff --git a/python/cuml/linear_model/linear_regression.pyx b/python/cuml/linear_model/linear_regression.pyx index a4023e9307..d334fa9a30 100644 --- a/python/cuml/linear_model/linear_regression.pyx +++ b/python/cuml/linear_model/linear_regression.pyx @@ -209,7 +209,7 @@ class LinearRegression(Base, if handle is None and algorithm == 'eig': # if possible, create two streams, so that eigenvalue decomposition # can benefit from running independent operations concurrently. - handle = Handle(2) + handle = Handle(n_streams=2) super().__init__(handle=handle, verbose=verbose, output_type=output_type) diff --git a/python/cuml/random_projection/random_projection.pyx b/python/cuml/random_projection/random_projection.pyx index 0ab6d9a99a..0d7f18f022 100644 --- a/python/cuml/random_projection/random_projection.pyx +++ b/python/cuml/random_projection/random_projection.pyx @@ -29,6 +29,8 @@ from cuml.raft.common.handle cimport * from cuml.common import input_to_cuml_array from cuml.common.mixins import FMajorInputTagMixin +from rmm._lib.cuda_stream_view cimport cuda_stream_view + cdef extern from * nogil: ctypedef void* _Stream "cudaStream_t" @@ -47,7 +49,7 @@ cdef extern from "cuml/random_projection/rproj_c.h" namespace "ML": # Structure describing random matrix cdef cppclass rand_mat[T]: - rand_mat(_Stream stream) except + # random matrix structure constructor (set all to nullptr) # noqa E501 + rand_mat(cuda_stream_view stream) except + # random matrix structure constructor (set all to nullptr) # noqa E501 T *dense_data # dense random matrix data int *indices # sparse CSC random matrix indices int *indptr # sparse CSC random matrix indptr @@ -162,7 +164,7 @@ cdef class BaseRandomProjection(): random_state=None): cdef handle_t* handle_ = self.handle.getHandle() - cdef _Stream stream = handle_.get_stream() + cdef cuda_stream_view stream = handle_.get_stream() self.rand_matS = new rand_mat[float](stream) self.rand_matD = new rand_mat[double](stream) diff --git a/python/cuml/test/test_base.py b/python/cuml/test/test_base.py index 433cc118f7..ed7c445e2c 100644 --- a/python/cuml/test/test_base.py +++ b/python/cuml/test/test_base.py @@ -40,9 +40,8 @@ def test_base_class_usage(): def test_base_class_usage_with_handle(): - handle = cuml.Handle() stream = cuml.cuda.Stream() - handle.setStream(stream) + handle = cuml.Handle(stream=stream) base = cuml.Base(handle=handle) base.handle.sync() del base diff --git a/python/cuml/test/test_svm.py b/python/cuml/test/test_svm.py index a55b841c6d..8c17794c0c 100644 --- a/python/cuml/test/test_svm.py +++ b/python/cuml/test/test_svm.py @@ -509,8 +509,7 @@ def test_svm_memleak(params, n_rows, n_iter, n_cols, """ X_train, X_test, y_train, y_test = make_dataset(dataset, n_rows, n_cols) stream = cuml.cuda.Stream() - handle = cuml.Handle() - handle.setStream(stream) + handle = cuml.Handle(stream=stream) # Warmup. Some modules that are used in SVC allocate space on the device # and consume memory. Here we make sure that this allocation is done # before the first call to get_memory_info. @@ -559,8 +558,7 @@ def test_svm_memleak_on_exception(params, n_rows=1000, n_iter=10, random_state=137, centers=2) X_train = X_train.astype(np.float32) stream = cuml.cuda.Stream() - handle = cuml.Handle() - handle.setStream(stream) + handle = cuml.Handle(stream=stream) # Warmup. Some modules that are used in SVC allocate space on the device # and consume memory. Here we make sure that this allocation is done diff --git a/python/cuml/test/utils.py b/python/cuml/test/utils.py index 51318230ea..fb904be3ca 100644 --- a/python/cuml/test/utils.py +++ b/python/cuml/test/utils.py @@ -171,9 +171,8 @@ def sqnorm(x): def get_handle(use_handle, n_streams=0): if not use_handle: return None, None - h = cuml.Handle(n_streams) s = cuml.cuda.Stream() - h.setStream(s) + h = cuml.Handle(stream=s, n_streams=n_streams) return h, s diff --git a/wiki/cpp/DEVELOPER_GUIDE.md b/wiki/cpp/DEVELOPER_GUIDE.md index bc88e4da54..faa041feec 100644 --- a/wiki/cpp/DEVELOPER_GUIDE.md +++ b/wiki/cpp/DEVELOPER_GUIDE.md @@ -353,11 +353,9 @@ When multiple streams are needed, e.g. to manage a pipeline, use the internal st ```cpp void foo(const double* const srcdata, double* const result) { - raft::handle_t raftHandle; - cudaStream_t stream; CUDA_RT_CALL( cudaStreamCreate( &stream ) ); - raftHandle.set_stream( stream ); + raft::handle_t raftHandle( stream ); ... From d1b79c2710b6aae258abc3847982afd67396f307 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 21 Oct 2021 13:41:44 -0700 Subject: [PATCH 02/18] trying to correct build in ci --- cpp/cmake/thirdparty/get_raft.cmake | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 2a24b090d5..a7581796ac 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -30,8 +30,8 @@ function(find_and_configure_raft) BUILD_EXPORT_SET cuml-exports INSTALL_EXPORT_SET cuml-exports CPM_ARGS - GIT_REPOSITORY https://github.com/divyegala/raft.git - GIT_TAG imp-21.10-handle_stream + GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git + GIT_TAG ${PKG_PINNED_TAG} SOURCE_SUBDIR cpp OPTIONS "BUILD_TESTS OFF" @@ -56,6 +56,6 @@ set(CUML_BRANCH_VERSION_raft "${CUML_VERSION_MAJOR}.${CUML_VERSION_MINOR}") # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft find_and_configure_raft(VERSION ${CUML_MIN_VERSION_raft} - FORK rapidsai - PINNED_TAG branch-${CUML_BRANCH_VERSION_raft} - ) \ No newline at end of file + FORK https://github.com/divyegala/raft.git + PINNED_TAG imp-21.10-handle_stream + ) From b237ed72a7d802e32a419933dead8754c40472b7 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 21 Oct 2021 13:42:32 -0700 Subject: [PATCH 03/18] minor mistake --- cpp/cmake/thirdparty/get_raft.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index a7581796ac..b874fd7093 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -56,6 +56,6 @@ set(CUML_BRANCH_VERSION_raft "${CUML_VERSION_MAJOR}.${CUML_VERSION_MINOR}") # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft find_and_configure_raft(VERSION ${CUML_MIN_VERSION_raft} - FORK https://github.com/divyegala/raft.git + FORK divyegala PINNED_TAG imp-21.10-handle_stream ) From 6ea0914f7678da72aef1b57388b13c0785144dab Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 21 Oct 2021 15:18:04 -0700 Subject: [PATCH 04/18] correcting default unpacking arg --- python/cuml/common/base.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/common/base.pyx b/python/cuml/common/base.pyx index 4b0873a3d7..eaa09903a0 100644 --- a/python/cuml/common/base.pyx +++ b/python/cuml/common/base.pyx @@ -160,7 +160,7 @@ class Base(TagsMixin, handle=None, verbose=False, output_type=None, - handle_kwargs=None): + handle_kwargs={}): """ Constructor. All children must call init method of this base class. From 162f4ab991deae7f8f4af581fe779adc441d11dc Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 28 Oct 2021 14:41:46 -0700 Subject: [PATCH 05/18] fix bad merge --- cpp/bench/prims/fused_l2_nn.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/bench/prims/fused_l2_nn.cu b/cpp/bench/prims/fused_l2_nn.cu index bd011cb145..285525af68 100644 --- a/cpp/bench/prims/fused_l2_nn.cu +++ b/cpp/bench/prims/fused_l2_nn.cu @@ -44,8 +44,7 @@ struct FusedL2NN : public Fixture { alloc(out, params.m); alloc(workspace, params.m); raft::random::Rng r(123456ULL); - raft::handle_t handle; - handle.set_stream(stream); + raft::handle_t handle {stream}; r.uniform(x, params.m * params.k, T(-1.0), T(1.0), stream); r.uniform(y, params.n * params.k, T(-1.0), T(1.0), stream); From e3f1a1796986a969e8f69212eb71b271fed96e4a Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 28 Oct 2021 14:52:59 -0700 Subject: [PATCH 06/18] style check --- cpp/bench/prims/fused_l2_nn.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/bench/prims/fused_l2_nn.cu b/cpp/bench/prims/fused_l2_nn.cu index 285525af68..c497f32b04 100644 --- a/cpp/bench/prims/fused_l2_nn.cu +++ b/cpp/bench/prims/fused_l2_nn.cu @@ -44,7 +44,7 @@ struct FusedL2NN : public Fixture { alloc(out, params.m); alloc(workspace, params.m); raft::random::Rng r(123456ULL); - raft::handle_t handle {stream}; + raft::handle_t handle{stream}; r.uniform(x, params.m * params.k, T(-1.0), T(1.0), stream); r.uniform(y, params.n * params.k, T(-1.0), T(1.0), stream); From 871fa6dcfa3e43936e390bc6a46edcb252690db4 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 5 Nov 2021 11:15:25 -0700 Subject: [PATCH 07/18] fixing kl div tsne pairwise_distance call --- cpp/test/sg/rproj_test.cu | 2 +- cpp/test/sg/tsne_test.cu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/test/sg/rproj_test.cu b/cpp/test/sg/rproj_test.cu index dc6ad9518b..353d6a0d22 100644 --- a/cpp/test/sg/rproj_test.cu +++ b/cpp/test/sg/rproj_test.cu @@ -125,7 +125,7 @@ class RPROJTest : public ::testing::Test { void random_matrix_check() { - size_t D = johnson_lindenstrauss_min_dim(N, epsilon); + int D = johnson_lindenstrauss_min_dim(N, epsilon); ASSERT_TRUE(params1->n_components == D); ASSERT_TRUE(random_matrix1->dense_data.size() > 0); diff --git a/cpp/test/sg/tsne_test.cu b/cpp/test/sg/tsne_test.cu index a25c4d8e4d..b72be5cfd7 100644 --- a/cpp/test/sg/tsne_test.cu +++ b/cpp/test/sg/tsne_test.cu @@ -148,7 +148,7 @@ class TSNETest : public ::testing::TestWithParam { n, model_params.dim, raft::distance::DistanceType::L2Expanded, - stream); + false); CUDA_CHECK(cudaStreamSynchronize(stream)); // Compute theorical KL div From 57fbc82eb4b33609fcda5672fd7be2dedd0ab019 Mon Sep 17 00:00:00 2001 From: divyegala Date: Fri, 5 Nov 2021 16:15:38 -0700 Subject: [PATCH 08/18] fixing failing pytests --- python/cuml/common/base.pyx | 7 +++---- python/cuml/manifold/t_sne.pyx | 3 +-- python/cuml/test/test_random_forest.py | 4 +++- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/cuml/common/base.pyx b/python/cuml/common/base.pyx index eaa09903a0..07ee9db733 100644 --- a/python/cuml/common/base.pyx +++ b/python/cuml/common/base.pyx @@ -159,14 +159,13 @@ class Base(TagsMixin, def __init__(self, *, handle=None, verbose=False, - output_type=None, - handle_kwargs={}): + output_type=None): """ Constructor. All children must call init method of this base class. """ - self.handle = cuml.raft.common.handle.Handle(**handle_kwargs) \ - if handle is None else handle + self.handle = cuml.raft.common.handle.Handle() if handle is None \ + else handle # Internally, self.verbose follows the spdlog/c++ standard of # 0 is most logging, and logging decreases from there. diff --git a/python/cuml/manifold/t_sne.pyx b/python/cuml/manifold/t_sne.pyx index b47a077ff5..dabb9d9f4a 100644 --- a/python/cuml/manifold/t_sne.pyx +++ b/python/cuml/manifold/t_sne.pyx @@ -533,8 +533,7 @@ class TSNE(Base, free(params) self._kl_divergence_ = kl_divergence - if self.verbose: - print("[t-SNE] KL divergence: {}".format(kl_divergence)) + logger.debug("[TSNE] KL Divergence: %f" % kl_divergence) return self @generate_docstring(convert_dtype_cast='np.float32', diff --git a/python/cuml/test/test_random_forest.py b/python/cuml/test/test_random_forest.py index ec2dcefe8c..50c40fdb69 100644 --- a/python/cuml/test/test_random_forest.py +++ b/python/cuml/test/test_random_forest.py @@ -552,7 +552,8 @@ def rf_classification( ) X_test = X_test.astype(datatype[1]) - handle, stream = get_handle(True, n_streams=1) + n_streams = 1 + handle, stream = get_handle(True, n_streams=n_streams) # Initialize, fit and predict using cuML's # random forest classification model cuml_model = curfc( @@ -566,6 +567,7 @@ def rf_classification( handle=handle, max_leaves=-1, max_depth=16, + n_streams=n_streams, ) if array_type == "dataframe": X_train_df = cudf.DataFrame(X_train) From 5bfef7f583a2791e18ee174a38087bd9c814f8d6 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Dec 2021 12:29:27 -0800 Subject: [PATCH 09/18] adding testing label --- ci/cpu/build.sh | 8 ++++---- ci/gpu/build.sh | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index b923795dc0..0972499327 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -74,12 +74,12 @@ BUILD_CUML=1 if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then if [ "$BUILD_LIBCUML" == '1' -o "$BUILD_CUML" == '1' ]; then gpuci_logger "Build conda pkg for libcuml" - gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcuml + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} -c rapidsai-nightly/label/testing conda/recipes/libcuml fi else if [ "$BUILD_LIBCUML" == '1' ]; then gpuci_logger "PROJECT FLASH: Build conda pkg for libcuml" - gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcuml --dirty --no-remove-work-dir + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} -c rapidsai-nightly/label/testing conda/recipes/libcuml --dirty --no-remove-work-dir mkdir -p ${CONDA_BLD_DIR}/libcuml/work cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcuml/work rm -rf ${CONDA_BLD_DIR}/work @@ -89,10 +89,10 @@ fi if [ "$BUILD_CUML" == '1' ]; then if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then gpuci_logger "Build conda pkg for cuml" - gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cuml --python=${PYTHON} + gpuci_conda_retry build --croot ${CONDA_BLD_DIR} -c rapidsai-nightly/label/testing conda/recipes/cuml --python=${PYTHON} else gpuci_logger "PROJECT FLASH: Build conda pkg for cuml" - gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/cuml -c $CONDA_BLD_DIR --dirty --no-remove-work-dir --python=${PYTHON} + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} -c rapidsai-nightly/label/testing conda/recipes/cuml -c $CONDA_BLD_DIR --dirty --no-remove-work-dir --python=${PYTHON} mkdir -p ${CONDA_BLD_DIR}/cuml/work cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/cuml/work rm -rf ${CONDA_BLD_DIR}/work diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 505ef4c6ca..4fb22700b3 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -46,7 +46,7 @@ gpuci_logger "Activate conda env" conda activate rapids gpuci_logger "Install dependencies" -gpuci_mamba_retry install -c conda-forge -c rapidsai -c rapidsai-nightly -c nvidia \ +gpuci_mamba_retry install -c conda-forge -c rapidsai -c rapidsai-nightly/label/testing -c rapidsai-nightly -c nvidia \ "cudatoolkit=${CUDA_REL}" \ "cudf=${MINOR_VERSION}" \ "rmm=${MINOR_VERSION}" \ @@ -190,7 +190,7 @@ else CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install gpuci_logger "Installing $CONDA_FILE" - gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" + gpuci_mamba_retry install -c rapidsai-nightly/label/testing -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" # FIXME: Project FLASH only builds for python version 3.7 which is the one used in # the CUDA 11.0 job, need to change all versions to project flash @@ -200,7 +200,7 @@ else CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install echo "Installing $CONDA_FILE" - gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" + gpuci_mamba_retry install -c rapidsai-nightly/label/testing -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" else gpuci_logger "Building cuml python in gpu job" From f6495466f6dbcd119bd01547c54864c1105da22d Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Dec 2021 14:50:07 -0800 Subject: [PATCH 10/18] correcting handle usage in svm --- cpp/src/svm/linear.cu | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/svm/linear.cu b/cpp/src/svm/linear.cu index be5401f211..4c78b99717 100644 --- a/cpp/src/svm/linear.cu +++ b/cpp/src/svm/linear.cu @@ -304,7 +304,7 @@ class WorkerHandle { : handle_ptr(new raft::handle_t(h, stream_id, 0)), stream_id(stream_id), handle(*handle_ptr), - stream(h.get_internal_stream(stream_id)) + stream(h.get_next_usable_stream(stream_id)) { } @@ -322,7 +322,7 @@ LinearSVMModel LinearSVMModel::allocate(const raft::handle_t& handle, const std::size_t nCols, const std::size_t nClasses) { - auto stream = handle.get_stream_view(); + auto stream = handle.get_stream(); auto res = rmm::mr::get_current_device_resource(); const std::size_t coefRows = nCols + params.fit_intercept; const std::size_t coefCols = nClasses <= 2 ? 1 : nClasses; @@ -340,7 +340,7 @@ LinearSVMModel LinearSVMModel::allocate(const raft::handle_t& handle, template void LinearSVMModel::free(const raft::handle_t& handle, LinearSVMModel& model) { - auto stream = handle.get_stream_view(); + auto stream = handle.get_stream(); auto res = rmm::mr::get_current_device_resource(); const std::size_t coefRows = model.coefRows; const std::size_t coefCols = model.coefCols(); @@ -427,7 +427,7 @@ LinearSVMModel LinearSVMModel::fit(const raft::handle_t& handle, // one-vs-rest logic goes over each class std::vector targets(coefCols); std::vector num_iters(coefCols); - const int n_streams = coefCols > 1 ? handle.get_num_internal_streams() : 1; + const int n_streams = coefCols > 1 ? handle.get_stream_pool_size() : 1; bool parallel = n_streams > 1; #pragma omp parallel for num_threads(n_streams) if (parallel) for (int class_i = 0; class_i < coefCols; class_i++) { @@ -496,7 +496,7 @@ LinearSVMModel LinearSVMModel::fit(const raft::handle_t& handle, worker.stream, (T*)sampleWeight); } - if (parallel) handle.wait_on_internal_streams(); + if (parallel) handle.sync_stream_pool(); if (coefCols > 1) { raft::linalg::transpose(handle, w1, model.w, coefRows, coefCols, stream); @@ -517,7 +517,7 @@ void LinearSVMModel::predict(const raft::handle_t& handle, const std::size_t nCols, T* out) { - auto stream = handle.get_stream_view(); + auto stream = handle.get_stream(); const auto coefCols = model.coefCols(); if (isRegression(params.loss)) return predictLinear( @@ -547,7 +547,7 @@ void LinearSVMModel::predictProba(const raft::handle_t& handle, ASSERT(model.probScale != nullptr, "The model was not trained to output probabilities (model.probScale == nullptr)."); - auto stream = handle.get_stream_view(); + auto stream = handle.get_stream(); const auto coefCols = model.coefCols(); rmm::device_uvector temp(nRows * coefCols, stream); From 4993df93469106b3c231ca2e4a1c0ee1004a81bb Mon Sep 17 00:00:00 2001 From: Divye Gala Date: Tue, 7 Dec 2021 16:41:48 -0800 Subject: [PATCH 11/18] One more update to linear.cu in svm --- cpp/src/svm/linear.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/svm/linear.cu b/cpp/src/svm/linear.cu index 4c78b99717..23faaa54c5 100644 --- a/cpp/src/svm/linear.cu +++ b/cpp/src/svm/linear.cu @@ -301,7 +301,7 @@ class WorkerHandle { } WorkerHandle(const raft::handle_t& h, int stream_id) - : handle_ptr(new raft::handle_t(h, stream_id, 0)), + : handle_ptr(new raft::handle_t(h.get_next_usable_stream(stream_id)), stream_id(stream_id), handle(*handle_ptr), stream(h.get_next_usable_stream(stream_id)) From b563f274395327343ff9f44cea6268450c8545a7 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Dec 2021 10:39:04 -0800 Subject: [PATCH 12/18] correcting parantheses --- cpp/src/svm/linear.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/svm/linear.cu b/cpp/src/svm/linear.cu index 23faaa54c5..c6d36d08b6 100644 --- a/cpp/src/svm/linear.cu +++ b/cpp/src/svm/linear.cu @@ -301,7 +301,7 @@ class WorkerHandle { } WorkerHandle(const raft::handle_t& h, int stream_id) - : handle_ptr(new raft::handle_t(h.get_next_usable_stream(stream_id)), + : handle_ptr{new raft::handle_t{h.get_next_usable_stream(stream_id)}}, stream_id(stream_id), handle(*handle_ptr), stream(h.get_next_usable_stream(stream_id)) From 311c16f9a53690b70b08398448131a7f6bf394c8 Mon Sep 17 00:00:00 2001 From: divyegala Date: Wed, 8 Dec 2021 12:25:31 -0800 Subject: [PATCH 13/18] removing set stream --- cpp/examples/symreg/symreg_example.cpp | 5 +---- cpp/test/sg/genetic/evolution_test.cu | 5 ----- cpp/test/sg/genetic/program_test.cu | 5 ----- 3 files changed, 1 insertion(+), 14 deletions(-) diff --git a/cpp/examples/symreg/symreg_example.cpp b/cpp/examples/symreg/symreg_example.cpp index a33d9af8bf..860effb542 100644 --- a/cpp/examples/symreg/symreg_example.cpp +++ b/cpp/examples/symreg/symreg_example.cpp @@ -198,12 +198,10 @@ int main(int argc, char* argv[]) /* ======================= Begin GPU memory allocation ======================= */ std::cout << "***************************************" << std::endl; - raft::handle_t handle; std::shared_ptr allocator(new raft::mr::device::default_allocator()); cudaStream_t stream; - CUDA_RT_CALL(cudaStreamCreate(&stream)); - handle.set_stream(stream); + raft::handle_t handle{stream}; // Begin recording time cudaEventRecord(start, stream); @@ -342,6 +340,5 @@ int main(int argc, char* argv[]) raft::deallocate(d_finalprogs, stream); CUDA_RT_CALL(cudaEventDestroy(start)); CUDA_RT_CALL(cudaEventDestroy(stop)); - CUDA_RT_CALL(cudaStreamDestroy(stream)); return 0; } diff --git a/cpp/test/sg/genetic/evolution_test.cu b/cpp/test/sg/genetic/evolution_test.cu index 0f718120c8..910089ad7b 100644 --- a/cpp/test/sg/genetic/evolution_test.cu +++ b/cpp/test/sg/genetic/evolution_test.cu @@ -53,8 +53,6 @@ class GeneticEvolutionTest : public ::testing::Test { void SetUp() override { ML::Logger::get().setLevel(CUML_LEVEL_INFO); - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); // Set training param vals hyper_params.population_size = 5000; @@ -113,10 +111,7 @@ class GeneticEvolutionTest : public ::testing::Test { stream)); } - void TearDown() override { CUDA_CHECK(cudaStreamDestroy(stream)); } - raft::handle_t handle; - cudaStream_t stream; param hyper_params; // Some mini-dataset constants diff --git a/cpp/test/sg/genetic/program_test.cu b/cpp/test/sg/genetic/program_test.cu index 1e2d3225df..ace14ebd3d 100644 --- a/cpp/test/sg/genetic/program_test.cu +++ b/cpp/test/sg/genetic/program_test.cu @@ -49,9 +49,6 @@ class GeneticProgramTest : public ::testing::Test { protected: void SetUp() override { - CUDA_CHECK(cudaStreamCreate(&stream)); - handle.set_stream(stream); - // Params hyper_params.population_size = 2; hyper_params.random_state = 123; @@ -155,11 +152,9 @@ class GeneticProgramTest : public ::testing::Test { rmm::mr::get_current_device_resource()->deallocate(d_nodes1, 7 * sizeof(node), stream); rmm::mr::get_current_device_resource()->deallocate(d_nodes2, 7 * sizeof(node), stream); rmm::mr::get_current_device_resource()->deallocate(d_progs, 2 * sizeof(program), stream); - CUDA_CHECK(cudaStreamDestroy(stream)); } raft::handle_t handle; - cudaStream_t stream; const int n_cols = 3; const int n_progs = 2; const int n_samples = 25; From 24908b76d9e7e95ac8e71cb500e29095bdf007d3 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 9 Dec 2021 12:46:15 -0800 Subject: [PATCH 14/18] successful compilation --- cpp/test/sg/genetic/evolution_test.cu | 4 +++- cpp/test/sg/genetic/program_test.cu | 4 +++- cpp/test/sg/linear_svm_test.cu | 6 +++--- python/cuml/svm/linear.pyx | 14 +++++++++----- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/cpp/test/sg/genetic/evolution_test.cu b/cpp/test/sg/genetic/evolution_test.cu index 910089ad7b..002990e8f1 100644 --- a/cpp/test/sg/genetic/evolution_test.cu +++ b/cpp/test/sg/genetic/evolution_test.cu @@ -45,7 +45,8 @@ class GeneticEvolutionTest : public ::testing::Test { d_test(0, cudaStream_t(0)), d_testlab(0, cudaStream_t(0)), d_trainwts(0, cudaStream_t(0)), - d_testwts(0, cudaStream_t(0)) + d_testwts(0, cudaStream_t(0)), + stream(handle.get_stream()) { } @@ -112,6 +113,7 @@ class GeneticEvolutionTest : public ::testing::Test { } raft::handle_t handle; + cudaStream_t stream; param hyper_params; // Some mini-dataset constants diff --git a/cpp/test/sg/genetic/program_test.cu b/cpp/test/sg/genetic/program_test.cu index ace14ebd3d..64de9e3cc6 100644 --- a/cpp/test/sg/genetic/program_test.cu +++ b/cpp/test/sg/genetic/program_test.cu @@ -42,7 +42,8 @@ class GeneticProgramTest : public ::testing::Test { dx2(0, cudaStream_t(0)), dy2(0, cudaStream_t(0)), dw2(0, cudaStream_t(0)), - dyp2(0, cudaStream_t(0)) + dyp2(0, cudaStream_t(0)), + stream(handle.get_stream()) { } @@ -155,6 +156,7 @@ class GeneticProgramTest : public ::testing::Test { } raft::handle_t handle; + cudaStream_t stream; const int n_cols = 3; const int n_progs = 2; const int n_samples = 25; diff --git a/cpp/test/sg/linear_svm_test.cu b/cpp/test/sg/linear_svm_test.cu index b9cabfd4c8..9fe70eb881 100644 --- a/cpp/test/sg/linear_svm_test.cu +++ b/cpp/test/sg/linear_svm_test.cu @@ -48,14 +48,14 @@ template struct LinearSVMTest : public ::testing::TestWithParam { const LinearSVMTestParams params; const raft::handle_t handle; - rmm::cuda_stream_view stream; + cudaStream_t stream; LinearSVMTest() : testing::TestWithParam(), params( ParamsReader::read(::testing::TestWithParam::GetParam())), - handle(8), - stream(handle.get_stream_view()) + handle(rmm::cuda_stream_per_thread, std::make_shared(8)), + stream(handle.get_stream()) { } diff --git a/python/cuml/svm/linear.pyx b/python/cuml/svm/linear.pyx index 664914b4bd..3934985dc7 100644 --- a/python/cuml/svm/linear.pyx +++ b/python/cuml/svm/linear.pyx @@ -26,14 +26,18 @@ from cuml.internals.base_helpers import BaseMetaClass from cuml.common.array_descriptor import CumlArrayDescriptor from cuml.common.array import CumlArray from cuml.common.base import Base -from cuml.raft.common.handle cimport handle_t, _Stream +from cuml.raft.common.handle cimport handle_t from cuml.common import input_to_cuml_array from libc.stdint cimport uintptr_t from libcpp cimport bool as cppbool cimport rmm._lib.lib as rmm +from rmm._lib.cuda_stream_view cimport cuda_stream_view __all__ = ['LinearSVM', 'LinearSVM_defaults'] +cdef extern from * nogil: + ctypedef void* _Stream "cudaStream_t" + cdef extern from "cuml/svm/linear.hpp" namespace "ML::SVM": cdef enum Penalty "ML::SVM::LinearSVMParams::Penalty": @@ -221,7 +225,7 @@ cdef class LinearSVMWrapper: self, target: CumlArray, source: CumlArray, synchronize: bool = True): - cdef _Stream stream = self.handle.get_stream() + cdef cuda_stream_view stream = self.handle.get_stream() if source.shape != target.shape: raise AttributeError( f"Expected an array of shape {target.shape}, " @@ -235,9 +239,9 @@ cdef class LinearSVMWrapper: source.ptr, (source.nbytes), rmm.cudaMemcpyDeviceToDevice, - stream) + <_Stream> stream) if synchronize: - rmm.cudaStreamSynchronize(stream) + self.handle.sync_stream() def __cinit__( self, @@ -277,7 +281,7 @@ cdef class LinearSVMWrapper: " estimator with fit_intercept enabled") self.dtype = X.dtype if do_training else coefs.dtype - cdef _Stream stream = self.handle.get_stream() + cdef cuda_stream_view stream = self.handle.get_stream() nClasses = 0 nCols = 0 From 131bd49ee788dc273784a319a50d3ad63b286aa3 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 9 Dec 2021 13:03:07 -0800 Subject: [PATCH 15/18] trying to fix style --- python/cuml/svm/linear.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cuml/svm/linear.pyx b/python/cuml/svm/linear.pyx index 3934985dc7..0fb6efb29d 100644 --- a/python/cuml/svm/linear.pyx +++ b/python/cuml/svm/linear.pyx @@ -31,6 +31,7 @@ from cuml.common import input_to_cuml_array from libc.stdint cimport uintptr_t from libcpp cimport bool as cppbool cimport rmm._lib.lib as rmm + from rmm._lib.cuda_stream_view cimport cuda_stream_view __all__ = ['LinearSVM', 'LinearSVM_defaults'] From 8054834c157756a3a754a4389f0028a9ad5acba0 Mon Sep 17 00:00:00 2001 From: divyegala Date: Thu, 9 Dec 2021 13:18:23 -0800 Subject: [PATCH 16/18] trying style check again --- python/cuml/svm/linear.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/cuml/svm/linear.pyx b/python/cuml/svm/linear.pyx index 0fb6efb29d..df0e425ec1 100644 --- a/python/cuml/svm/linear.pyx +++ b/python/cuml/svm/linear.pyx @@ -20,6 +20,9 @@ import inspect import typing import numpy as np import cuml + +from rmm._lib.cuda_stream_view cimport cuda_stream_view + from collections import OrderedDict from cython.operator cimport dereference as deref from cuml.internals.base_helpers import BaseMetaClass @@ -32,8 +35,6 @@ from libc.stdint cimport uintptr_t from libcpp cimport bool as cppbool cimport rmm._lib.lib as rmm -from rmm._lib.cuda_stream_view cimport cuda_stream_view - __all__ = ['LinearSVM', 'LinearSVM_defaults'] cdef extern from * nogil: From 9de45404bbf7a016220b1c7730b190a9be63d6ff Mon Sep 17 00:00:00 2001 From: divyegala Date: Mon, 13 Dec 2021 10:11:51 -0800 Subject: [PATCH 17/18] fix failing sil score gtest --- cpp/test/prims/silhouette_score.cu | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/test/prims/silhouette_score.cu b/cpp/test/prims/silhouette_score.cu index a6ffb41624..3eb43a9200 100644 --- a/cpp/test/prims/silhouette_score.cu +++ b/cpp/test/prims/silhouette_score.cu @@ -42,7 +42,12 @@ struct silhouetteScoreParam { template class silhouetteScoreTest : public ::testing::TestWithParam { protected: - silhouetteScoreTest() : d_X(0, stream), sampleSilScore(0, stream), d_labels(0, stream) {} + silhouetteScoreTest() + : d_X(0, handle.get_stream()), + sampleSilScore(0, handle.get_stream()), + d_labels(0, handle.get_stream()) + { + } void host_silhouette_score() { @@ -58,7 +63,7 @@ class silhouetteScoreTest : public ::testing::TestWithParam Date: Mon, 13 Dec 2021 14:24:12 -0800 Subject: [PATCH 18/18] remove testing label and revert raft back to main --- ci/cpu/build.sh | 8 ++++---- ci/gpu/build.sh | 6 +++--- cpp/cmake/thirdparty/get_raft.cmake | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 0972499327..b923795dc0 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -74,12 +74,12 @@ BUILD_CUML=1 if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then if [ "$BUILD_LIBCUML" == '1' -o "$BUILD_CUML" == '1' ]; then gpuci_logger "Build conda pkg for libcuml" - gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} -c rapidsai-nightly/label/testing conda/recipes/libcuml + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcuml fi else if [ "$BUILD_LIBCUML" == '1' ]; then gpuci_logger "PROJECT FLASH: Build conda pkg for libcuml" - gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} -c rapidsai-nightly/label/testing conda/recipes/libcuml --dirty --no-remove-work-dir + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcuml --dirty --no-remove-work-dir mkdir -p ${CONDA_BLD_DIR}/libcuml/work cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcuml/work rm -rf ${CONDA_BLD_DIR}/work @@ -89,10 +89,10 @@ fi if [ "$BUILD_CUML" == '1' ]; then if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then gpuci_logger "Build conda pkg for cuml" - gpuci_conda_retry build --croot ${CONDA_BLD_DIR} -c rapidsai-nightly/label/testing conda/recipes/cuml --python=${PYTHON} + gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cuml --python=${PYTHON} else gpuci_logger "PROJECT FLASH: Build conda pkg for cuml" - gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} -c rapidsai-nightly/label/testing conda/recipes/cuml -c $CONDA_BLD_DIR --dirty --no-remove-work-dir --python=${PYTHON} + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/cuml -c $CONDA_BLD_DIR --dirty --no-remove-work-dir --python=${PYTHON} mkdir -p ${CONDA_BLD_DIR}/cuml/work cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/cuml/work rm -rf ${CONDA_BLD_DIR}/work diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 4fb22700b3..505ef4c6ca 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -46,7 +46,7 @@ gpuci_logger "Activate conda env" conda activate rapids gpuci_logger "Install dependencies" -gpuci_mamba_retry install -c conda-forge -c rapidsai -c rapidsai-nightly/label/testing -c rapidsai-nightly -c nvidia \ +gpuci_mamba_retry install -c conda-forge -c rapidsai -c rapidsai-nightly -c nvidia \ "cudatoolkit=${CUDA_REL}" \ "cudf=${MINOR_VERSION}" \ "rmm=${MINOR_VERSION}" \ @@ -190,7 +190,7 @@ else CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install gpuci_logger "Installing $CONDA_FILE" - gpuci_mamba_retry install -c rapidsai-nightly/label/testing -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" + gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" # FIXME: Project FLASH only builds for python version 3.7 which is the one used in # the CUDA 11.0 job, need to change all versions to project flash @@ -200,7 +200,7 @@ else CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install echo "Installing $CONDA_FILE" - gpuci_mamba_retry install -c rapidsai-nightly/label/testing -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" + gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" else gpuci_logger "Building cuml python in gpu job" diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index b874fd7093..50845ec4b8 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -56,6 +56,6 @@ set(CUML_BRANCH_VERSION_raft "${CUML_VERSION_MAJOR}.${CUML_VERSION_MINOR}") # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft find_and_configure_raft(VERSION ${CUML_MIN_VERSION_raft} - FORK divyegala - PINNED_TAG imp-21.10-handle_stream + FORK rapidsai + PINNED_TAG branch-${CUML_BRANCH_VERSION_raft} )