diff --git a/BUILD.md b/BUILD.md index b10dc87f89..1bf3783fae 100644 --- a/BUILD.md +++ b/BUILD.md @@ -140,15 +140,47 @@ The following example shows how to use the `libraft-distance` API with the pre-c ### Building RAFT C++ from source in cmake -RAFT uses the [RAPIDS cmake](https://github.com/rapidsai/rapids-cmake) library, so it can be easily included into downstream projects. RAPIDS cmake provides a convenience layer around the [Cmake Package Manager (CPM)](https://github.com/cpm-cmake/CPM.cmake). The following example is similar to building RAFT itself from source but allows it to be done in cmake, providing the `raft::raft` link target and `RAFT_INCLUDE_DIR` for includes. The `COMPILE_LIBRARIES` option enables the building of the shared libraries +RAFT uses the [RAPIDS cmake](https://github.com/rapidsai/rapids-cmake) library, so it can be easily included into downstream projects. RAPIDS cmake provides a convenience layer around the [Cmake Package Manager (CPM)](https://github.com/cpm-cmake/CPM.cmake). The following example is similar to building RAFT itself from source but allows it to be done in cmake, providing the `raft::raft` link target and `RAFT_INCLUDE_DIR` for includes. The `COMPILE_LIBRARIES` option enables the building of the shared libraries. + +The following `cmake` snippet enables a flexible configuration of RAFT: ```cmake -function(find_and_configure_raft) - set(oneValueArgs VERSION FORK PINNED_TAG USE_FAISS_STATIC COMPILE_LIBRARIES ENABLE_NN_DEPENDENCIES) +set(RAFT_VERSION "22.04") + +function(find_and_configure_raft) + set(oneValueArgs VERSION FORK PINNED_TAG USE_FAISS_STATIC + COMPILE_LIBRARIES ENABLE_NN_DEPENDENCIES CLONE_ON_PIN + USE_NN_LIBRARY USE_DISTANCE_LIBRARY) cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) + #----------------------------------------------------- + # Clone RAFT locally if PINNED_TAG has been changed + #----------------------------------------------------- + if(PKG_CLONE_ON_PIN AND NOT PKG_PINNED_TAG STREQUAL "branch-${RAFT_VERSION}") + message("Pinned tag found: ${PKG_PINNED_TAG}. Cloning raft locally.") + set(CPM_DOWNLOAD_raft ON) + set(CMAKE_IGNORE_PATH "${CMAKE_INSTALL_PREFIX}/include/raft;${CMAKE_IGNORE_PATH}) + endif() + + #----------------------------------------------------- + # Add components + #----------------------------------------------------- + + string(APPEND RAFT_COMPONENTS "") + if(PKG_USE_NN_LIBRARY) + string(APPEND RAFT_COMPONENTS " nn") + endif() + + if(PKG_USE_DISTANCE_LIBRARY) + string(APPEND RAFT_COMPONENTS " distance") + endif() + + #----------------------------------------------------- + # Invoke CPM find_package() + #----------------------------------------------------- + rapids_cpm_find(raft ${PKG_VERSION} GLOBAL_TARGETS raft::raft BUILD_EXPORT_SET proj-exports @@ -170,11 +202,19 @@ endfunction() # Change pinned tag here to test a commit in CI # To use a different RAFT locally, set the CMake variable # CPM_raft_SOURCE=/path/to/local/raft -find_and_configure_raft(VERSION 22.02.00 +find_and_configure_raft(VERSION ${RAFT_VERSION}.00 FORK rapidsai - PINNED_TAG branch-22.02 + PINNED_TAG branch-${RAFT_VERSION} + + # When PINNED_TAG above doesn't match cuml, + # force local raft clone in build directory + # even if it's already installed. + CLONE_ON_PIN ON + COMPILE_LIBRARIES NO - ENABLE_NN_DEPENDENCIES NO + USE_NN_LIBRARY NO + USE_DISTANCE_LIBRARY NO + ENABLE_NN_DEPENDENCIES NO # This builds FAISS if not installed USE_FAISS_STATIC NO ) ``` diff --git a/README.md b/README.md index 9260c755dd..a79679c579 100755 --- a/README.md +++ b/README.md @@ -3,12 +3,11 @@ RAFT contains fundamental widely-used algorithms and primitives for data science, graph and machine learning. The algorithms are CUDA-accelerated and form building-blocks for rapidly composing analytics in the [RAPIDS](https://rapids.ai) ecosystem. By taking a primitives-based approach to algorithm development, RAFT -1. accelerates algorithm construction time -2. reduces the maintenance burden by maximizing reuse across projects, and -3. centralizes the core computations, allowing future optimizations to benefit all algorithms that use them. - -At its core, RAFT is a header-only C++ library with optional shared libraries that span the following categories: +- accelerates algorithm construction time +- reduces the maintenance burden by maximizing reuse across projects, and +- centralizes the core computations, allowing future optimizations to benefit all algorithms that use them. +The algorithms in RAFT span the following general categories: ##### | Category | Examples | | --- | --- | @@ -16,18 +15,20 @@ At its core, RAFT is a header-only C++ library with optional shared libraries th | **Data Generation** | sparse, spatial, machine learning datasets | | **Dense Linear Algebra** | matrix arithmetic, norms, factorization, least squares, svd & eigenvalue problems | | **Spatial** | pairwise distances, nearest neighbors, neighborhood graph construction | -| **Sparse Operations** | linear algebra, eigenvalue problems, slicing, symmetrization, connected component labeling | +| **Sparse Operations** | linear algebra, eigenvalue problems, slicing, symmetrization, labeling | | **Basic Clustering** | spectral clustering, hierarchical clustering, k-means | -| **Combinatorial Optimization** | linear assignment problem, minimum spanning forest | -| **Iterative Solvers** | lanczos | +| **Optimization** | combinatorial optimization, iterative solvers | | **Statistics** | sampling, moments and summary statistics, metrics | | **Distributed Tools** | multi-node multi-gpu infrastructure | -RAFT also provides a Python library that includes +RAFT provides a header-only C++ library and pre-compiled shared libraries that can 1) speed up compile times and 2) enable the APIs to be used without CUDA-enabled compilers. + +RAFT also provides a Python library that is currently limited to 1. a python wrapper around the `raft::handle_t` for managing cuda library resources -2. building multi-node multi-GPU algorithms that leverage [Dask](https://dask.org/) +2. definitions for using `raft::handle_t` directly in cython +3. tools for building multi-node multi-GPU algorithms that leverage [Dask](https://dask.org/) -We are continuing to improve the Python API by exposing the core algorithms and primitives from the categories above. +The Python API is being improved to wrap the algorithms and primitives from the categories above. ## Getting started @@ -65,9 +66,82 @@ raft::distance::pairwise_distance(handle, input.data(), input.data(), workspace.data(), metric); ``` -## Build/Install RAFT +## Installing + +RAFT can be installed through conda, cmake-package-manager (cpm), or by building the repository from source. + +### Conda + +The easiest way to install RAFT is through conda and several packages are provided. +- `libraft-headers` contains all the CUDA/C++ headers +- `libraft-nn` (optional) contains precompiled shared libraries for the nearest neighbors algorithms. If FAISS is not already installed in your environment, this will need to be installed to use the nearest neighbors headers. +- `libraft-distance` (optional) contains shared libraries for distance algorithms. +- `pyraft` (optional) contains the Python library + +To install RAFT with conda (change to `rapidsai-nightly` for more up-to-date but less stable nightly packages) +```bash +conda install -c rapidsai libraft-headers libraft-nn libraft-distance pyraft +``` + +After installing RAFT, `find_package(raft COMPONENTS nn distance)` can be used in your CUDA/C++ build. Note that the `COMPONENTS` are optional and will depend on the packages installed. + +### CPM + +RAFT uses the [RAPIDS cmake](https://github.com/rapidsai/rapids-cmake) library, which makes it simple to include in downstream cmake projects. RAPIDS cmake provides a convenience layer around the [Cmake Package Manager (CPM)](https://github.com/cpm-cmake/CPM.cmake). + +After [installing](https://github.com/rapidsai/rapids-cmake#installation) rapids-cmake in your project, you can begin using RAFT by placing the code snippet below in a file named `get_raft.cmake` and including it in your cmake build with `include(get_raft.cmake)`. This will create the `raft::raft` target to add to configure the link libraries for your artifacts. + +```cmake + +set(RAFT_VERSION "22.04") + +function(find_and_configure_raft) + set(oneValueArgs VERSION FORK PINNED_TAG USE_FAISS_STATIC + COMPILE_LIBRARIES ENABLE_NN_DEPENDENCIES) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN} ) + + #----------------------------------------------------- + # Invoke CPM find_package() + #----------------------------------------------------- + + rapids_cpm_find(raft ${PKG_VERSION} + GLOBAL_TARGETS raft::raft + BUILD_EXPORT_SET proj-exports + INSTALL_EXPORT_SET proj-exports + CPM_ARGS + GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git + GIT_TAG ${PKG_PINNED_TAG} + SOURCE_SUBDIR cpp + OPTIONS + "BUILD_TESTS OFF" + "RAFT_ENABLE_NN_DEPENDENCIES ${PKG_ENABLE_NN_DEPENDENCIES}" + "RAFT_USE_FAISS_STATIC ${PKG_USE_FAISS_STATIC}" + "RAFT_COMPILE_LIBRARIES ${PKG_COMPILE_LIBRARIES}" + ) + +endfunction() + +# Change pinned tag here to test a commit in CI +# To use a different RAFT locally, set the CMake variable +# CPM_raft_SOURCE=/path/to/local/raft +find_and_configure_raft(VERSION ${RAFT_VERSION}.00 + FORK rapidsai + PINNED_TAG branch-${RAFT_VERSION} + + COMPILE_LIBRARIES NO + ENABLE_NN_DEPENDENCIES NO + USE_FAISS_STATIC NO +) +``` + +### Source + +The easiest way to build RAFT from source is to use the `build.sh` script at the root of the repository, +1. create an environment with the RAFT dependencies: `conda env create --name raft_dev -f conda/environments/raft_dev_cuda11.5.yml` +2. run the build script from the repository root: `./build.sh pyraft libraft --compile-libs` -Refer to the [Build](BUILD.md) instructions for details on building and including the RAFT library in downstream projects. +The [Build](BUILD.md) instructions contain more details on building RAFT from source and including it in downstream projects. You can also find a more comprehensive version of the above CPM code snippet the [Building RAFT C++ from source](BUILD.md#build_cxx_source) guide. ## Folder Structure and Contents diff --git a/build.sh b/build.sh index 1c581eff19..9a3295321f 100755 --- a/build.sh +++ b/build.sh @@ -133,9 +133,6 @@ fi if hasArg --buildfaiss; then BUILD_STATIC_FAISS=ON fi -if hasArg --singlegpu; then - SINGLEGPU="--singlegpu" -fi if hasArg --nvtx; then NVTX=ON fi