From 074c9519296ec76493ca0e68b3866a68419fdf09 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 21 Sep 2021 11:42:55 -0500 Subject: [PATCH 01/13] FIX add option to clone RAFT even if it is in the envirionment and use it in CI --- build.sh | 7 +++++++ cpp/CMakeLists.txt | 1 + cpp/cmake/thirdparty/get_raft.cmake | 15 ++++++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/build.sh b/build.sh index b100095f8d..9ca1212867 100755 --- a/build.sh +++ b/build.sh @@ -46,6 +46,7 @@ HELP="$0 [ ...] [ ...] --codecov - Enable code coverage support by compiling with Cython linetracing and profiling enabled (WARNING: Impacts performance) --ccache - Use ccache to cache previous compilations + --nocloneraft - CMake will clone RAFT even if it is in the environment, use this flag to disable that behavior default action (no args) is to build and install 'libcuml', 'cuml', and 'prims' targets only for the detected GPU arch @@ -77,6 +78,7 @@ BUILD_CUML_TESTS=ON BUILD_CUML_MG_TESTS=OFF BUILD_STATIC_FAISS=OFF CMAKE_LOG_LEVEL=WARNING +FORCE_CLONE_RAFT=ON # Set defaults for vars that may not have been defined externally # FIXME: if INSTALL_PREFIX is not set, check PREFIX, then check @@ -129,6 +131,7 @@ LONG_ARGUMENT_LIST=( "codecov" "ccache" "nolibcumltest" + "nocloneraft" ) # Short arguments @@ -188,6 +191,9 @@ while true; do --nolibcumltest ) BUILD_CUML_TESTS=OFF ;; + --nocloneraft ) + FORCE_CLONE_RAFT=OFF + ;; --) shift break @@ -239,6 +245,7 @@ if completeBuild || hasArg libcuml || hasArg prims || hasArg bench || hasArg pri -DBUILD_CUML_TESTS=${BUILD_CUML_TESTS} \ -DBUILD_CUML_MPI_COMMS=${BUILD_CUML_MG_TESTS} \ -DBUILD_CUML_MG_TESTS=${BUILD_CUML_MG_TESTS} \ + -DFORCE_CLONE_RAFT=${FORCE_CLONE_RAFT} \ -DNVTX=${NVTX} \ -DUSE_CCACHE=${CCACHE} \ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b91af8a62e..2e93559dba 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -59,6 +59,7 @@ option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" option(DISABLE_DEPRECATION_WARNINGS "Disable depreaction warnings " ON) option(DISABLE_OPENMP "Disable OpenMP" OFF) option(ENABLE_CUMLPRIMS_MG "Enable algorithms that use libcumlprims_mg" ON) +option(FORCE_CLONE_RAFT "Force CPM to clone RAFT even if it is installed already" OFF) option(NVTX "Enable nvtx markers" OFF) option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF) option(USE_CCACHE "Cache build artifacts with ccache" OFF) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index abef8830d4..a9dfceeb04 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -20,6 +20,11 @@ function(find_and_configure_raft) cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) + if(DEFINED CPM_raft_SOURCE OR FORCE_CLONE_RAFT) + set(CPM_DL_ALL_CACHE ${CPM_DOWNLOAD_ALL}) + set(CPM_DOWNLOAD_ALL ON) + endif() + rapids_cpm_find(raft ${PKG_VERSION} GLOBAL_TARGETS raft::raft BUILD_EXPORT_SET cuml-exports @@ -32,7 +37,15 @@ function(find_and_configure_raft) "BUILD_TESTS OFF" ) - message(VERBOSE "CUML: Using RAFT located in ${raft_SOURCE_DIR}") + if(raft_ADDED) + message(VERBOSE "CUML: Using RAFT located in ${raft_SOURCE_DIR}") + else() + message(VERBOSE "CUML: Using RAFT located in ${raft_DIR}") + endif() + + if(DEFINED CPM_raft_SOURCE OR FORCE_CLONE_RAFT) + set(CPM_DOWNLOAD_ALL ${CPM_DL_ALL_CACHE}) + endif() endfunction() From 65d8d14bfd4bcb1d2e5c283027cca75e8f5dd778 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 21 Sep 2021 15:36:03 -0500 Subject: [PATCH 02/13] FIX Remove warnings --- cpp/src/fil/fil.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 72850eb4f5..202335bb47 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -72,10 +72,10 @@ cat_sets_owner::cat_sets_owner(const std::vector& cf) int fid = max_matching.size(); RAFT_EXPECTS( - cnt.max_matching >= -1, "@fid %zu: max_matching invalid (%d)", fid, cnt.max_matching); - RAFT_EXPECTS(cnt.n_nodes >= 0, "@fid %zu: n_nodes invalid (%d)", fid, cnt.n_nodes); + cnt.max_matching >= -1, "@fid %d: max_matching invalid (%d)", fid, cnt.max_matching); + RAFT_EXPECTS(cnt.n_nodes >= 0, "@fid %d: n_nodes invalid (%d)", fid, cnt.n_nodes); RAFT_EXPECTS(bits_size <= INT_MAX, - "@fid %zu: cannot store %zu categories given `int` offsets", + "@fid %d: cannot store %zu categories given `int` offsets", fid, bits_size); } From 07f2196be2b97893b8d74e97a35362acfa94a129 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 27 Sep 2021 16:31:15 -0500 Subject: [PATCH 03/13] ENH Rename variable to indicate preferred behavior is to clone raft --- build.sh | 6 +++--- cpp/CMakeLists.txt | 2 +- cpp/cmake/thirdparty/get_raft.cmake | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/build.sh b/build.sh index 9ca1212867..fb077e50ec 100755 --- a/build.sh +++ b/build.sh @@ -78,7 +78,7 @@ BUILD_CUML_TESTS=ON BUILD_CUML_MG_TESTS=OFF BUILD_STATIC_FAISS=OFF CMAKE_LOG_LEVEL=WARNING -FORCE_CLONE_RAFT=ON +DISABLE_FORCE_CLONE_RAFT=OFF # Set defaults for vars that may not have been defined externally # FIXME: if INSTALL_PREFIX is not set, check PREFIX, then check @@ -192,7 +192,7 @@ while true; do BUILD_CUML_TESTS=OFF ;; --nocloneraft ) - FORCE_CLONE_RAFT=OFF + DISABLE_FORCE_CLONE_RAFT=ON ;; --) shift @@ -245,7 +245,7 @@ if completeBuild || hasArg libcuml || hasArg prims || hasArg bench || hasArg pri -DBUILD_CUML_TESTS=${BUILD_CUML_TESTS} \ -DBUILD_CUML_MPI_COMMS=${BUILD_CUML_MG_TESTS} \ -DBUILD_CUML_MG_TESTS=${BUILD_CUML_MG_TESTS} \ - -DFORCE_CLONE_RAFT=${FORCE_CLONE_RAFT} \ + -DDISABLE_FORCE_CLONE_RAFT=${DISABLE_FORCE_CLONE_RAFT} \ -DNVTX=${NVTX} \ -DUSE_CCACHE=${CCACHE} \ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2e93559dba..2cbbd29af8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -57,9 +57,9 @@ option(CUDA_ENABLE_KERNEL_INFO "Enable kernel resource usage info" OFF) option(CUDA_ENABLE_LINE_INFO "Enable lineinfo in nvcc" OFF) option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON) option(DISABLE_DEPRECATION_WARNINGS "Disable depreaction warnings " ON) +option(DISABLE_FORCE_CLONE_RAFT "By default, CPM will clone RAFT even if it's already in the environment. Set to disable that behavior." OFF) option(DISABLE_OPENMP "Disable OpenMP" OFF) option(ENABLE_CUMLPRIMS_MG "Enable algorithms that use libcumlprims_mg" ON) -option(FORCE_CLONE_RAFT "Force CPM to clone RAFT even if it is installed already" OFF) option(NVTX "Enable nvtx markers" OFF) option(SINGLEGPU "Disable all mnmg components and comms libraries" OFF) option(USE_CCACHE "Cache build artifacts with ccache" OFF) diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index a9dfceeb04..50845ec4b8 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -20,7 +20,7 @@ function(find_and_configure_raft) cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) - if(DEFINED CPM_raft_SOURCE OR FORCE_CLONE_RAFT) + if(DEFINED CPM_raft_SOURCE OR NOT DISABLE_FORCE_CLONE_RAFT) set(CPM_DL_ALL_CACHE ${CPM_DOWNLOAD_ALL}) set(CPM_DOWNLOAD_ALL ON) endif() @@ -43,7 +43,7 @@ function(find_and_configure_raft) message(VERBOSE "CUML: Using RAFT located in ${raft_DIR}") endif() - if(DEFINED CPM_raft_SOURCE OR FORCE_CLONE_RAFT) + if(DEFINED CPM_raft_SOURCE OR NOT DISABLE_FORCE_CLONE_RAFT) set(CPM_DOWNLOAD_ALL ${CPM_DL_ALL_CACHE}) endif() From 7029a17f36cab7b76d9a7297184a0a35c767ac45 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 27 Sep 2021 17:44:59 -0500 Subject: [PATCH 04/13] FIX More warnings that made it in --- cpp/src/fil/fil.cu | 8 ++++---- cpp/src_prims/linalg/lstsq.cuh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/fil/fil.cu b/cpp/src/fil/fil.cu index 202335bb47..40a492bea6 100644 --- a/cpp/src/fil/fil.cu +++ b/cpp/src/fil/fil.cu @@ -70,12 +70,12 @@ cat_sets_owner::cat_sets_owner(const std::vector& cf) max_matching.push_back(cnt.max_matching); bits_size += categorical_sets::sizeof_mask_from_max_matching(cnt.max_matching) * cnt.n_nodes; - int fid = max_matching.size(); + auto fid = max_matching.size(); RAFT_EXPECTS( - cnt.max_matching >= -1, "@fid %d: max_matching invalid (%d)", fid, cnt.max_matching); - RAFT_EXPECTS(cnt.n_nodes >= 0, "@fid %d: n_nodes invalid (%d)", fid, cnt.n_nodes); + cnt.max_matching >= -1, "@fid %zu: max_matching invalid (%d)", fid, cnt.max_matching); + RAFT_EXPECTS(cnt.n_nodes >= 0, "@fid %zu: n_nodes invalid (%d)", fid, cnt.n_nodes); RAFT_EXPECTS(bits_size <= INT_MAX, - "@fid %d: cannot store %zu categories given `int` offsets", + "@fid %zu: cannot store %zu categories given `int` offsets", fid, bits_size); } diff --git a/cpp/src_prims/linalg/lstsq.cuh b/cpp/src_prims/linalg/lstsq.cuh index 3352e956eb..abc143003e 100644 --- a/cpp/src_prims/linalg/lstsq.cuh +++ b/cpp/src_prims/linalg/lstsq.cuh @@ -56,7 +56,7 @@ struct DeviceEvent { } ~DeviceEvent() { - if (e != nullptr) CUDA_CHECK(cudaEventDestroy(e)); + if (e != nullptr) CUDA_CHECK_NO_THROW(cudaEventDestroy(e)); } operator cudaEvent_t() const { return e; } void record(cudaStream_t stream) From 586b29be73070a73c011ca4960d04ead2f0d4e73 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 27 Sep 2021 21:02:39 -0500 Subject: [PATCH 05/13] DBG skip test_benchmark.py::test_real_algos_runner[UMAP-Supervised] --- python/cuml/test/test_benchmark.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/cuml/test/test_benchmark.py b/python/cuml/test/test_benchmark.py index 1a5b57f902..d5748cd44e 100644 --- a/python/cuml/test/test_benchmark.py +++ b/python/cuml/test/test_benchmark.py @@ -170,8 +170,9 @@ def predict(self, X): # Only test a few algorithms (which collectively span several types) # to reduce runtime burden -@pytest.mark.parametrize('algo_name', ['UMAP-Supervised', - 'DBSCAN', +# skipping UMAP-Supervised due to issue +# https://github.com/rapidsai/cuml/issues/4243 +@pytest.mark.parametrize('algo_name', ['DBSCAN', 'LogisticRegression', 'ElasticNet', 'FIL']) From a0826909743f7246bbb6ebbbfde431bfee5e9662 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 27 Sep 2021 21:17:51 -0500 Subject: [PATCH 06/13] FIX Copyright years --- python/cuml/test/test_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/test/test_benchmark.py b/python/cuml/test/test_benchmark.py index d5748cd44e..43d6cdef57 100644 --- a/python/cuml/test/test_benchmark.py +++ b/python/cuml/test/test_benchmark.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From e644dbce22c8e22860ef93143f252012e625d5c6 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 28 Sep 2021 10:50:03 -0400 Subject: [PATCH 07/13] Adding assertions for high k to nearest neighbors tests. --- python/cuml/test/test_nearest_neighbors.py | 40 ++++++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/python/cuml/test/test_nearest_neighbors.py b/python/cuml/test/test_nearest_neighbors.py index 66c7fb8f53..4cd28925b2 100644 --- a/python/cuml/test/test_nearest_neighbors.py +++ b/python/cuml/test/test_nearest_neighbors.py @@ -323,7 +323,7 @@ def test_return_dists(): @pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), stress_param(70000)]) @pytest.mark.parametrize('n_feats', [unit_param(3), stress_param(1000)]) -@pytest.mark.parametrize('k', [unit_param(3), stress_param(50)]) +@pytest.mark.parametrize('k', [unit_param(3), unit_param(35), stress_param(50)]) @pytest.mark.parametrize("metric", valid_metrics()) def test_knn_separate_index_search(input_type, nrows, n_feats, k, metric): X, _ = make_blobs(n_samples=nrows, @@ -378,7 +378,7 @@ def test_knn_separate_index_search(input_type, nrows, n_feats, k, metric): @pytest.mark.parametrize('input_type', ['dataframe', 'ndarray']) @pytest.mark.parametrize('nrows', [unit_param(500), stress_param(70000)]) @pytest.mark.parametrize('n_feats', [unit_param(3), stress_param(1000)]) -@pytest.mark.parametrize('k', [unit_param(3), stress_param(50)]) +@pytest.mark.parametrize('k', [unit_param(3), unit_param(35), stress_param(50)]) @pytest.mark.parametrize("metric", valid_metrics()) def test_knn_x_none(input_type, nrows, n_feats, k, metric): X, _ = make_blobs(n_samples=nrows, @@ -467,7 +467,7 @@ def test_nn_downcast_fails(input_type, nrows, n_feats): @pytest.mark.parametrize('nrows', [unit_param(10), stress_param(1000)]) @pytest.mark.parametrize('n_feats', [unit_param(5), stress_param(100)]) @pytest.mark.parametrize("p", [2, 5]) -@pytest.mark.parametrize('k', [unit_param(3), stress_param(30)]) +@pytest.mark.parametrize('k', [unit_param(3), unit_param(35), stress_param(30)]) @pytest.mark.parametrize("metric", valid_metrics()) def test_knn_graph(input_type, mode, output_type, as_instance, nrows, n_feats, p, k, metric): @@ -507,6 +507,40 @@ def test_knn_graph(input_type, mode, output_type, as_instance, assert isspmatrix_csr(sparse_cu) +<<<<<<< Updated upstream +======= +@pytest.mark.parametrize('distance', ["euclidean", "haversine"]) +@pytest.mark.parametrize('n_neighbors', [2, 35]) +@pytest.mark.parametrize('nrows', [unit_param(500), stress_param(70000)]) +def test_nearest_neighbors_rbc(distance, n_neighbors, nrows): + X, y = make_blobs(n_samples=nrows, + n_features=2, random_state=0) + + knn_cu = cuKNN(metric=distance, algorithm="rbc") + knn_cu.fit(X) + + rbc_d, rbc_i = knn_cu.kneighbors(X[:int(nrows/2), :], + n_neighbors=n_neighbors) + + knn_cu2 = cuKNN(metric=distance, algorithm="brute") + knn_cu2.fit(X) + + brute_d, brute_i = knn_cu2.kneighbors(X[:int(nrows/2), :], + n_neighbors=n_neighbors) + + cp.testing.assert_allclose(rbc_d, brute_d, atol=5e-2, + rtol=1e-3) + rbc_i = cp.sort(rbc_i, axis=1) + brute_i = cp.sort(brute_i, axis=1) + + diff = rbc_i != brute_i + + # Using a very small tolerance for subtle differences + # in indices that result from + assert diff.ravel().sum() < 5 + + +>>>>>>> Stashed changes @pytest.mark.parametrize("metric", valid_metrics_sparse()) @pytest.mark.parametrize( 'nrows,ncols,density,n_neighbors,batch_size_index,batch_size_query', From 32e1ba7b1739bf51a88c6f937cfac12554857a6a Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 28 Sep 2021 12:29:26 -0400 Subject: [PATCH 08/13] Fixing style. Fixing umap bench --- python/cuml/test/test_benchmark.py | 2 +- python/cuml/test/test_nearest_neighbors.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/python/cuml/test/test_benchmark.py b/python/cuml/test/test_benchmark.py index 1a5b57f902..5c0720c1de 100644 --- a/python/cuml/test/test_benchmark.py +++ b/python/cuml/test/test_benchmark.py @@ -183,7 +183,7 @@ def test_real_algos_runner(algo_name): pytest.xfail() runner = AccuracyComparisonRunner( - [20], [5], dataset_name='classification', test_fraction=0.20 + [50], [5], dataset_name='classification', test_fraction=0.20 ) results = runner.run(pair)[0] print(results) diff --git a/python/cuml/test/test_nearest_neighbors.py b/python/cuml/test/test_nearest_neighbors.py index 4cd28925b2..7b38426b05 100644 --- a/python/cuml/test/test_nearest_neighbors.py +++ b/python/cuml/test/test_nearest_neighbors.py @@ -323,7 +323,8 @@ def test_return_dists(): @pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), stress_param(70000)]) @pytest.mark.parametrize('n_feats', [unit_param(3), stress_param(1000)]) -@pytest.mark.parametrize('k', [unit_param(3), unit_param(35), stress_param(50)]) +@pytest.mark.parametrize('k', [unit_param(3), unit_param(35), + stress_param(50)]) @pytest.mark.parametrize("metric", valid_metrics()) def test_knn_separate_index_search(input_type, nrows, n_feats, k, metric): X, _ = make_blobs(n_samples=nrows, @@ -377,8 +378,10 @@ def test_knn_separate_index_search(input_type, nrows, n_feats, k, metric): @pytest.mark.parametrize('input_type', ['dataframe', 'ndarray']) @pytest.mark.parametrize('nrows', [unit_param(500), stress_param(70000)]) -@pytest.mark.parametrize('n_feats', [unit_param(3), stress_param(1000)]) -@pytest.mark.parametrize('k', [unit_param(3), unit_param(35), stress_param(50)]) +@pytest.mark.parametrize('n_feats', [unit_param(3), + stress_param(1000)]) +@pytest.mark.parametrize('k', [unit_param(3), unit_param(35), + stress_param(50)]) @pytest.mark.parametrize("metric", valid_metrics()) def test_knn_x_none(input_type, nrows, n_feats, k, metric): X, _ = make_blobs(n_samples=nrows, @@ -467,7 +470,8 @@ def test_nn_downcast_fails(input_type, nrows, n_feats): @pytest.mark.parametrize('nrows', [unit_param(10), stress_param(1000)]) @pytest.mark.parametrize('n_feats', [unit_param(5), stress_param(100)]) @pytest.mark.parametrize("p", [2, 5]) -@pytest.mark.parametrize('k', [unit_param(3), unit_param(35), stress_param(30)]) +@pytest.mark.parametrize('k', [unit_param(3), unit_param(35), + stress_param(30)]) @pytest.mark.parametrize("metric", valid_metrics()) def test_knn_graph(input_type, mode, output_type, as_instance, nrows, n_feats, p, k, metric): @@ -507,8 +511,6 @@ def test_knn_graph(input_type, mode, output_type, as_instance, assert isspmatrix_csr(sparse_cu) -<<<<<<< Updated upstream -======= @pytest.mark.parametrize('distance', ["euclidean", "haversine"]) @pytest.mark.parametrize('n_neighbors', [2, 35]) @pytest.mark.parametrize('nrows', [unit_param(500), stress_param(70000)]) @@ -540,7 +542,6 @@ def test_nearest_neighbors_rbc(distance, n_neighbors, nrows): assert diff.ravel().sum() < 5 ->>>>>>> Stashed changes @pytest.mark.parametrize("metric", valid_metrics_sparse()) @pytest.mark.parametrize( 'nrows,ncols,density,n_neighbors,batch_size_index,batch_size_query', From 277df4d4a27aed2a1ed315167db3681b4cb9136e Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 28 Sep 2021 12:43:24 -0400 Subject: [PATCH 09/13] Removing some parameters from test_ivfpq to speed it up a little bit --- python/cuml/test/test_nearest_neighbors.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cuml/test/test_nearest_neighbors.py b/python/cuml/test/test_nearest_neighbors.py index 7b38426b05..b05d78293e 100644 --- a/python/cuml/test/test_nearest_neighbors.py +++ b/python/cuml/test/test_nearest_neighbors.py @@ -215,11 +215,11 @@ def test_ivfflat_pred(nrows, ncols, n_neighbors, nlist): @pytest.mark.parametrize("nlist", [8]) -@pytest.mark.parametrize("M", [16, 32]) -@pytest.mark.parametrize("n_bits", [4, 6]) +@pytest.mark.parametrize("M", [32]) +@pytest.mark.parametrize("n_bits", [4]) @pytest.mark.parametrize("usePrecomputedTables", [False, True]) @pytest.mark.parametrize("nrows", [4000]) -@pytest.mark.parametrize("ncols", [128, 512]) +@pytest.mark.parametrize("ncols", [64, 512]) @pytest.mark.parametrize("n_neighbors", [8]) def test_ivfpq_pred(nrows, ncols, n_neighbors, nlist, M, n_bits, usePrecomputedTables): From 96eefbec709b7ebafb15f241472df54a2910766f Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 28 Sep 2021 12:49:47 -0400 Subject: [PATCH 10/13] Adding assertion for n_neighbors to trustworthiness tests --- python/cuml/metrics/trustworthiness.pyx | 3 +++ python/cuml/test/test_trustworthiness.py | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/python/cuml/metrics/trustworthiness.pyx b/python/cuml/metrics/trustworthiness.pyx index 9bb907f81d..d05be9ec4d 100644 --- a/python/cuml/metrics/trustworthiness.pyx +++ b/python/cuml/metrics/trustworthiness.pyx @@ -88,6 +88,9 @@ def trustworthiness(X, X_embedded, handle=None, n_neighbors=5, warnings.warn("Parameter should_downcast is deprecated, use " "convert_dtype instead. ") + if n_neighbors > X.shape[0]: + raise ValueError("n_neighbors must be <= the number of rows.") + handle = cuml.raft.common.handle.Handle() if handle is None else handle cdef uintptr_t d_X_ptr diff --git a/python/cuml/test/test_trustworthiness.py b/python/cuml/test/test_trustworthiness.py index 7a2a117067..17a623bd3f 100644 --- a/python/cuml/test/test_trustworthiness.py +++ b/python/cuml/test/test_trustworthiness.py @@ -48,3 +48,11 @@ def test_trustworthiness(input_type, n_samples, n_features, n_components, cu_score = cuml_trustworthiness(X, X_embedded, batch_size=batch_size) assert abs(cu_score - sk_score) <= 1e-3 + + +def test_trustworthiness_invalid_input(): + X, y = make_blobs(n_samples=10, centers=1, + n_features=2, random_state=32) + + with pytest.raises(ValueError): + cuml_trustworthiness(X, X, n_neighbors=50) From 5aa378867fe2454d53093ba8b02db202b2fcbd74 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 28 Sep 2021 13:37:26 -0500 Subject: [PATCH 11/13] FIX Reduce space of ivfpq pytest --- python/cuml/test/test_nearest_neighbors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuml/test/test_nearest_neighbors.py b/python/cuml/test/test_nearest_neighbors.py index 66c7fb8f53..25f440419b 100644 --- a/python/cuml/test/test_nearest_neighbors.py +++ b/python/cuml/test/test_nearest_neighbors.py @@ -216,10 +216,10 @@ def test_ivfflat_pred(nrows, ncols, n_neighbors, nlist): @pytest.mark.parametrize("nlist", [8]) @pytest.mark.parametrize("M", [16, 32]) -@pytest.mark.parametrize("n_bits", [4, 6]) +@pytest.mark.parametrize("n_bits", [4, stress_param(6)]) @pytest.mark.parametrize("usePrecomputedTables", [False, True]) @pytest.mark.parametrize("nrows", [4000]) -@pytest.mark.parametrize("ncols", [128, 512]) +@pytest.mark.parametrize("ncols", [64, stress_param(512)]) @pytest.mark.parametrize("n_neighbors", [8]) def test_ivfpq_pred(nrows, ncols, n_neighbors, nlist, M, n_bits, usePrecomputedTables): From 1aa8771b8e12c2bbf829850fd84edebda396c8a3 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 28 Sep 2021 15:38:22 -0400 Subject: [PATCH 12/13] Removing rbc pytest --- python/cuml/test/test_nearest_neighbors.py | 31 ---------------------- 1 file changed, 31 deletions(-) diff --git a/python/cuml/test/test_nearest_neighbors.py b/python/cuml/test/test_nearest_neighbors.py index b05d78293e..340b8ff373 100644 --- a/python/cuml/test/test_nearest_neighbors.py +++ b/python/cuml/test/test_nearest_neighbors.py @@ -511,37 +511,6 @@ def test_knn_graph(input_type, mode, output_type, as_instance, assert isspmatrix_csr(sparse_cu) -@pytest.mark.parametrize('distance', ["euclidean", "haversine"]) -@pytest.mark.parametrize('n_neighbors', [2, 35]) -@pytest.mark.parametrize('nrows', [unit_param(500), stress_param(70000)]) -def test_nearest_neighbors_rbc(distance, n_neighbors, nrows): - X, y = make_blobs(n_samples=nrows, - n_features=2, random_state=0) - - knn_cu = cuKNN(metric=distance, algorithm="rbc") - knn_cu.fit(X) - - rbc_d, rbc_i = knn_cu.kneighbors(X[:int(nrows/2), :], - n_neighbors=n_neighbors) - - knn_cu2 = cuKNN(metric=distance, algorithm="brute") - knn_cu2.fit(X) - - brute_d, brute_i = knn_cu2.kneighbors(X[:int(nrows/2), :], - n_neighbors=n_neighbors) - - cp.testing.assert_allclose(rbc_d, brute_d, atol=5e-2, - rtol=1e-3) - rbc_i = cp.sort(rbc_i, axis=1) - brute_i = cp.sort(brute_i, axis=1) - - diff = rbc_i != brute_i - - # Using a very small tolerance for subtle differences - # in indices that result from - assert diff.ravel().sum() < 5 - - @pytest.mark.parametrize("metric", valid_metrics_sparse()) @pytest.mark.parametrize( 'nrows,ncols,density,n_neighbors,batch_size_index,batch_size_query', From 40c95b6edafbb34faab0bed623a44598eb04ea84 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 28 Sep 2021 22:58:19 -0400 Subject: [PATCH 13/13] Fixing test --- python/cuml/test/test_nearest_neighbors.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cuml/test/test_nearest_neighbors.py b/python/cuml/test/test_nearest_neighbors.py index 340b8ff373..1aa0e86290 100644 --- a/python/cuml/test/test_nearest_neighbors.py +++ b/python/cuml/test/test_nearest_neighbors.py @@ -323,7 +323,7 @@ def test_return_dists(): @pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000), stress_param(70000)]) @pytest.mark.parametrize('n_feats', [unit_param(3), stress_param(1000)]) -@pytest.mark.parametrize('k', [unit_param(3), unit_param(35), +@pytest.mark.parametrize('k', [unit_param(3), stress_param(50)]) @pytest.mark.parametrize("metric", valid_metrics()) def test_knn_separate_index_search(input_type, nrows, n_feats, k, metric): @@ -467,7 +467,7 @@ def test_nn_downcast_fails(input_type, nrows, n_feats): ("ndarray", "connectivity", "cupy", False), ("ndarray", "distance", "numpy", False), ]) -@pytest.mark.parametrize('nrows', [unit_param(10), stress_param(1000)]) +@pytest.mark.parametrize('nrows', [unit_param(100), stress_param(1000)]) @pytest.mark.parametrize('n_feats', [unit_param(5), stress_param(100)]) @pytest.mark.parametrize("p", [2, 5]) @pytest.mark.parametrize('k', [unit_param(3), unit_param(35),