Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to clone RAFT even if it is in the environment #4217

Merged
merged 15 commits into from
Sep 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ HELP="$0 [<target> ...] [<flag> ...]
--codecov - Enable code coverage support by compiling with Cython linetracing
and profiling enabled (WARNING: Impacts performance)
--ccache - Use ccache to cache previous compilations
--nocloneraft - CMake will clone RAFT even if it is in the environment, use this flag to disable that behavior

default action (no args) is to build and install 'libcuml', 'cuml', and 'prims' targets only for the detected GPU arch

Expand Down Expand Up @@ -77,6 +78,7 @@ BUILD_CUML_TESTS=ON
BUILD_CUML_MG_TESTS=OFF
BUILD_STATIC_FAISS=OFF
CMAKE_LOG_LEVEL=WARNING
DISABLE_FORCE_CLONE_RAFT=OFF

# Set defaults for vars that may not have been defined externally
# FIXME: if INSTALL_PREFIX is not set, check PREFIX, then check
Expand Down Expand Up @@ -129,6 +131,7 @@ LONG_ARGUMENT_LIST=(
"codecov"
"ccache"
"nolibcumltest"
"nocloneraft"
)

# Short arguments
Expand Down Expand Up @@ -188,6 +191,9 @@ while true; do
--nolibcumltest )
BUILD_CUML_TESTS=OFF
;;
--nocloneraft )
DISABLE_FORCE_CLONE_RAFT=ON
;;
--)
shift
break
Expand Down Expand Up @@ -239,6 +245,7 @@ if completeBuild || hasArg libcuml || hasArg prims || hasArg bench || hasArg pri
-DBUILD_CUML_TESTS=${BUILD_CUML_TESTS} \
-DBUILD_CUML_MPI_COMMS=${BUILD_CUML_MG_TESTS} \
-DBUILD_CUML_MG_TESTS=${BUILD_CUML_MG_TESTS} \
-DDISABLE_FORCE_CLONE_RAFT=${DISABLE_FORCE_CLONE_RAFT} \
-DNVTX=${NVTX} \
-DUSE_CCACHE=${CCACHE} \
-DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ option(CUDA_ENABLE_KERNEL_INFO "Enable kernel resource usage info" OFF)
option(CUDA_ENABLE_LINE_INFO "Enable lineinfo in nvcc" OFF)
option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON)
option(DISABLE_DEPRECATION_WARNINGS "Disable depreaction warnings " ON)
option(DISABLE_FORCE_CLONE_RAFT "By default, CPM will clone RAFT even if it's already in the environment. Set to disable that behavior." OFF)
option(DISABLE_OPENMP "Disable OpenMP" OFF)
option(ENABLE_CUMLPRIMS_MG "Enable algorithms that use libcumlprims_mg" ON)
option(NVTX "Enable nvtx markers" OFF)
Expand Down
15 changes: 14 additions & 1 deletion cpp/cmake/thirdparty/get_raft.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ function(find_and_configure_raft)
cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN} )

if(DEFINED CPM_raft_SOURCE OR NOT DISABLE_FORCE_CLONE_RAFT)
set(CPM_DL_ALL_CACHE ${CPM_DOWNLOAD_ALL})
set(CPM_DOWNLOAD_ALL ON)
endif()

rapids_cpm_find(raft ${PKG_VERSION}
GLOBAL_TARGETS raft::raft
BUILD_EXPORT_SET cuml-exports
Expand All @@ -32,7 +37,15 @@ function(find_and_configure_raft)
"BUILD_TESTS OFF"
)

message(VERBOSE "CUML: Using RAFT located in ${raft_SOURCE_DIR}")
if(raft_ADDED)
message(VERBOSE "CUML: Using RAFT located in ${raft_SOURCE_DIR}")
else()
message(VERBOSE "CUML: Using RAFT located in ${raft_DIR}")
endif()

if(DEFINED CPM_raft_SOURCE OR NOT DISABLE_FORCE_CLONE_RAFT)
set(CPM_DOWNLOAD_ALL ${CPM_DL_ALL_CACHE})
endif()

endfunction()

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/fil/fil.cu
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ cat_sets_owner::cat_sets_owner(const std::vector<cat_feature_counters>& cf)
max_matching.push_back(cnt.max_matching);
bits_size += categorical_sets::sizeof_mask_from_max_matching(cnt.max_matching) * cnt.n_nodes;

int fid = max_matching.size();
auto fid = max_matching.size();
RAFT_EXPECTS(
cnt.max_matching >= -1, "@fid %zu: max_matching invalid (%d)", fid, cnt.max_matching);
RAFT_EXPECTS(cnt.n_nodes >= 0, "@fid %zu: n_nodes invalid (%d)", fid, cnt.n_nodes);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src_prims/linalg/lstsq.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ struct DeviceEvent {
}
~DeviceEvent()
{
if (e != nullptr) CUDA_CHECK(cudaEventDestroy(e));
if (e != nullptr) CUDA_CHECK_NO_THROW(cudaEventDestroy(e));
}
operator cudaEvent_t() const { return e; }
void record(cudaStream_t stream)
Expand Down
3 changes: 3 additions & 0 deletions python/cuml/metrics/trustworthiness.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ def trustworthiness(X, X_embedded, handle=None, n_neighbors=5,
warnings.warn("Parameter should_downcast is deprecated, use "
"convert_dtype instead. ")

if n_neighbors > X.shape[0]:
raise ValueError("n_neighbors must be <= the number of rows.")

handle = cuml.raft.common.handle.Handle() if handle is None else handle

cdef uintptr_t d_X_ptr
Expand Down
9 changes: 5 additions & 4 deletions python/cuml/test/test_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -170,8 +170,9 @@ def predict(self, X):

# Only test a few algorithms (which collectively span several types)
# to reduce runtime burden
@pytest.mark.parametrize('algo_name', ['UMAP-Supervised',
'DBSCAN',
# skipping UMAP-Supervised due to issue
# https://github.com/rapidsai/cuml/issues/4243
@pytest.mark.parametrize('algo_name', ['DBSCAN',
'LogisticRegression',
'ElasticNet',
'FIL'])
Expand All @@ -183,7 +184,7 @@ def test_real_algos_runner(algo_name):
pytest.xfail()

runner = AccuracyComparisonRunner(
[20], [5], dataset_name='classification', test_fraction=0.20
[50], [5], dataset_name='classification', test_fraction=0.20
)
results = runner.run(pair)[0]
print(results)
Expand Down
20 changes: 12 additions & 8 deletions python/cuml/test/test_nearest_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,11 @@ def test_ivfflat_pred(nrows, ncols, n_neighbors, nlist):


@pytest.mark.parametrize("nlist", [8])
@pytest.mark.parametrize("M", [16, 32])
@pytest.mark.parametrize("n_bits", [4, 6])
@pytest.mark.parametrize("M", [32])
@pytest.mark.parametrize("n_bits", [4])
@pytest.mark.parametrize("usePrecomputedTables", [False, True])
@pytest.mark.parametrize("nrows", [4000])
@pytest.mark.parametrize("ncols", [128, 512])
@pytest.mark.parametrize("ncols", [64, 512])
@pytest.mark.parametrize("n_neighbors", [8])
def test_ivfpq_pred(nrows, ncols, n_neighbors,
nlist, M, n_bits, usePrecomputedTables):
Expand Down Expand Up @@ -323,7 +323,8 @@ def test_return_dists():
@pytest.mark.parametrize('nrows', [unit_param(500), quality_param(5000),
stress_param(70000)])
@pytest.mark.parametrize('n_feats', [unit_param(3), stress_param(1000)])
@pytest.mark.parametrize('k', [unit_param(3), stress_param(50)])
@pytest.mark.parametrize('k', [unit_param(3),
stress_param(50)])
@pytest.mark.parametrize("metric", valid_metrics())
def test_knn_separate_index_search(input_type, nrows, n_feats, k, metric):
X, _ = make_blobs(n_samples=nrows,
Expand Down Expand Up @@ -377,8 +378,10 @@ def test_knn_separate_index_search(input_type, nrows, n_feats, k, metric):

@pytest.mark.parametrize('input_type', ['dataframe', 'ndarray'])
@pytest.mark.parametrize('nrows', [unit_param(500), stress_param(70000)])
@pytest.mark.parametrize('n_feats', [unit_param(3), stress_param(1000)])
@pytest.mark.parametrize('k', [unit_param(3), stress_param(50)])
@pytest.mark.parametrize('n_feats', [unit_param(3),
stress_param(1000)])
@pytest.mark.parametrize('k', [unit_param(3), unit_param(35),
stress_param(50)])
@pytest.mark.parametrize("metric", valid_metrics())
def test_knn_x_none(input_type, nrows, n_feats, k, metric):
X, _ = make_blobs(n_samples=nrows,
Expand Down Expand Up @@ -464,10 +467,11 @@ def test_nn_downcast_fails(input_type, nrows, n_feats):
("ndarray", "connectivity", "cupy", False),
("ndarray", "distance", "numpy", False),
])
@pytest.mark.parametrize('nrows', [unit_param(10), stress_param(1000)])
@pytest.mark.parametrize('nrows', [unit_param(100), stress_param(1000)])
@pytest.mark.parametrize('n_feats', [unit_param(5), stress_param(100)])
@pytest.mark.parametrize("p", [2, 5])
@pytest.mark.parametrize('k', [unit_param(3), stress_param(30)])
@pytest.mark.parametrize('k', [unit_param(3), unit_param(35),
stress_param(30)])
@pytest.mark.parametrize("metric", valid_metrics())
def test_knn_graph(input_type, mode, output_type, as_instance,
nrows, n_feats, p, k, metric):
Expand Down
8 changes: 8 additions & 0 deletions python/cuml/test/test_trustworthiness.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,11 @@ def test_trustworthiness(input_type, n_samples, n_features, n_components,
cu_score = cuml_trustworthiness(X, X_embedded, batch_size=batch_size)

assert abs(cu_score - sk_score) <= 1e-3


def test_trustworthiness_invalid_input():
X, y = make_blobs(n_samples=10, centers=1,
n_features=2, random_state=32)

with pytest.raises(ValueError):
cuml_trustworthiness(X, X, n_neighbors=50)