diff --git a/build.sh b/build.sh index b100095f8d..fb077e50ec 100755 --- a/build.sh +++ b/build.sh @@ -46,6 +46,7 @@ HELP="$0 [ ...] [ ...] --codecov - Enable code coverage support by compiling with Cython linetracing and profiling enabled (WARNING: Impacts performance) --ccache - Use ccache to cache previous compilations + --nocloneraft - CMake will clone RAFT even if it is in the environment, use this flag to disable that behavior default action (no args) is to build and install 'libcuml', 'cuml', and 'prims' targets only for the detected GPU arch @@ -77,6 +78,7 @@ BUILD_CUML_TESTS=ON BUILD_CUML_MG_TESTS=OFF BUILD_STATIC_FAISS=OFF CMAKE_LOG_LEVEL=WARNING +DISABLE_FORCE_CLONE_RAFT=OFF # Set defaults for vars that may not have been defined externally # FIXME: if INSTALL_PREFIX is not set, check PREFIX, then check @@ -129,6 +131,7 @@ LONG_ARGUMENT_LIST=( "codecov" "ccache" "nolibcumltest" + "nocloneraft" ) # Short arguments @@ -188,6 +191,9 @@ while true; do --nolibcumltest ) BUILD_CUML_TESTS=OFF ;; + --nocloneraft ) + DISABLE_FORCE_CLONE_RAFT=ON + ;; --) shift break @@ -239,6 +245,7 @@ if completeBuild || hasArg libcuml || hasArg prims || hasArg bench || hasArg pri -DBUILD_CUML_TESTS=${BUILD_CUML_TESTS} \ -DBUILD_CUML_MPI_COMMS=${BUILD_CUML_MG_TESTS} \ -DBUILD_CUML_MG_TESTS=${BUILD_CUML_MG_TESTS} \ + -DDISABLE_FORCE_CLONE_RAFT=${DISABLE_FORCE_CLONE_RAFT} \ -DNVTX=${NVTX} \ -DUSE_CCACHE=${CCACHE} \ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b91af8a62e..2cbbd29af8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -57,6 +57,7 @@ option(CUDA_ENABLE_KERNEL_INFO "Enable kernel resource usage info" OFF) option(CUDA_ENABLE_LINE_INFO "Enable lineinfo in nvcc" OFF) option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON) option(DISABLE_DEPRECATION_WARNINGS "Disable depreaction warnings " ON) +option(DISABLE_FORCE_CLONE_RAFT "By default, CPM will clone RAFT even if it's already in the environment. Set to disable that behavior." OFF) option(DISABLE_OPENMP "Disable OpenMP" OFF) option(ENABLE_CUMLPRIMS_MG "Enable algorithms that use libcumlprims_mg" ON) option(NVTX "Enable nvtx markers" OFF) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index abef8830d4..50845ec4b8 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -20,6 +20,11 @@ function(find_and_configure_raft) cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) + if(DEFINED CPM_raft_SOURCE OR NOT DISABLE_FORCE_CLONE_RAFT) + set(CPM_DL_ALL_CACHE ${CPM_DOWNLOAD_ALL}) + set(CPM_DOWNLOAD_ALL ON) + endif() + rapids_cpm_find(raft ${PKG_VERSION} GLOBAL_TARGETS raft::raft BUILD_EXPORT_SET cuml-exports @@ -32,7 +37,15 @@ function(find_and_configure_raft) "BUILD_TESTS OFF" ) - message(VERBOSE "CUML: Using RAFT located in ${raft_SOURCE_DIR}") + if(raft_ADDED) + message(VERBOSE "CUML: Using RAFT located in ${raft_SOURCE_DIR}") + else() + message(VERBOSE "CUML: Using RAFT located in ${raft_DIR}") + endif() + + if(DEFINED CPM_raft_SOURCE OR NOT DISABLE_FORCE_CLONE_RAFT) + set(CPM_DOWNLOAD_ALL ${CPM_DL_ALL_CACHE}) + endif() endfunction() diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 72850eb4f5..40a492bea6 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -70,7 +70,7 @@ cat_sets_owner::cat_sets_owner(const std::vector& cf) max_matching.push_back(cnt.max_matching); bits_size += categorical_sets::sizeof_mask_from_max_matching(cnt.max_matching) * cnt.n_nodes; - int fid = max_matching.size(); + auto fid = max_matching.size(); RAFT_EXPECTS( cnt.max_matching >= -1, "@fid %zu: max_matching invalid (%d)", fid, cnt.max_matching); RAFT_EXPECTS(cnt.n_nodes >= 0, "@fid %zu: n_nodes invalid (%d)", fid, cnt.n_nodes); diff --git a/cpp/src_prims/linalg/lstsq.cuh b/cpp/src_prims/linalg/lstsq.cuh index 3352e956eb..abc143003e 100644 --- a/cpp/src_prims/linalg/lstsq.cuh +++ b/cpp/src_prims/linalg/lstsq.cuh @@ -56,7 +56,7 @@ struct DeviceEvent { } ~DeviceEvent() { - if (e != nullptr) CUDA_CHECK(cudaEventDestroy(e)); + if (e != nullptr) CUDA_CHECK_NO_THROW(cudaEventDestroy(e)); } operator cudaEvent_t() const { return e; } void record(cudaStream_t stream) diff --git a/python/cuml/metrics/trustworthiness.pyx b/python/cuml/metrics/trustworthiness.pyx index 9bb907f81d..d05be9ec4d 100644 --- a/python/cuml/metrics/trustworthiness.pyx +++ b/python/cuml/metrics/trustworthiness.pyx @@ -88,6 +88,9 @@ def trustworthiness(X, X_embedded, handle=None, n_neighbors=5, warnings.warn("Parameter should_downcast is deprecated, use " "convert_dtype instead. ") + if n_neighbors > X.shape[0]: + raise ValueError("n_neighbors must be <= the number of rows.") + handle = cuml.raft.common.handle.Handle() if handle is None else handle cdef uintptr_t d_X_ptr diff --git a/python/cuml/test/test_benchmark.py b/python/cuml/test/test_benchmark.py index 8350610565..3e1fbe1d85 100644 --- a/python/cuml/test/test_benchmark.py +++ b/python/cuml/test/test_benchmark.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -170,8 +170,9 @@ def predict(self, X): # Only test a few algorithms (which collectively span several types) # to reduce runtime burden -@pytest.mark.parametrize('algo_name', ['UMAP-Supervised', - 'DBSCAN', +# skipping UMAP-Supervised due to issue +# https://github.com/rapidsai/cuml/issues/4243 +@pytest.mark.parametrize('algo_name', ['DBSCAN', 'LogisticRegression', 'ElasticNet', 'FIL']) @@ -183,7 +184,7 @@ def test_real_algos_runner(algo_name): pytest.xfail() runner = AccuracyComparisonRunner( - [20], [5], dataset_name='classification', test_fraction=0.20 + [50], [5], dataset_name='classification', test_fraction=0.20 ) results = runner.run(pair)[0] print(results) diff --git a/python/cuml/test/test_nearest_neighbors.py b/python/cuml/test/test_nearest_neighbors.py index edc75533bd..7cfcdc46c2 100644 --- a/python/cuml/test/test_nearest_neighbors.py +++ b/python/cuml/test/test_nearest_neighbors.py @@ -324,7 +324,7 @@ def test_return_dists(): @pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), stress_param(70000)]) @pytest.mark.parametrize('n_feats', [unit_param(3), stress_param(1000)]) -@pytest.mark.parametrize('k', [unit_param(3), unit_param(35), +@pytest.mark.parametrize('k', [unit_param(3), stress_param(50)]) @pytest.mark.parametrize("metric", valid_metrics()) def test_knn_separate_index_search(input_type, nrows, n_feats, k, metric): @@ -468,7 +468,7 @@ def test_nn_downcast_fails(input_type, nrows, n_feats): ("ndarray", "connectivity", "cupy", False), ("ndarray", "distance", "numpy", False), ]) -@pytest.mark.parametrize('nrows', [unit_param(10), stress_param(1000)]) +@pytest.mark.parametrize('nrows', [unit_param(100), stress_param(1000)]) @pytest.mark.parametrize('n_feats', [unit_param(5), stress_param(100)]) @pytest.mark.parametrize("p", [2, 5]) @pytest.mark.parametrize('k', [unit_param(3), unit_param(35), diff --git a/python/cuml/test/test_trustworthiness.py b/python/cuml/test/test_trustworthiness.py index 7a2a117067..17a623bd3f 100644 --- a/python/cuml/test/test_trustworthiness.py +++ b/python/cuml/test/test_trustworthiness.py @@ -48,3 +48,11 @@ def test_trustworthiness(input_type, n_samples, n_features, n_components, cu_score = cuml_trustworthiness(X, X_embedded, batch_size=batch_size) assert abs(cu_score - sk_score) <= 1e-3 + + +def test_trustworthiness_invalid_input(): + X, y = make_blobs(n_samples=10, centers=1, + n_features=2, random_state=32) + + with pytest.raises(ValueError): + cuml_trustworthiness(X, X, n_neighbors=50)