diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml
index 2a90a9034a..236696d948 100644
--- a/.github/ops-bot.yaml
+++ b/.github/ops-bot.yaml
@@ -5,4 +5,4 @@ auto_merger: true
branch_checker: true
label_checker: true
release_drafter: true
-copy_prs: false
+copy_prs: true
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
new file mode 100644
index 0000000000..0a681b864b
--- /dev/null
+++ b/.github/workflows/wheels.yml
@@ -0,0 +1,72 @@
+name: RAFT wheels
+
+on:
+ workflow_call:
+ inputs:
+ versioneer-override:
+ type: string
+ default: ''
+ build-tag:
+ type: string
+ default: ''
+ branch:
+ required: true
+ type: string
+ date:
+ required: true
+ type: string
+ sha:
+ required: true
+ type: string
+ build-type:
+ type: string
+ default: nightly
+
+concurrency:
+ group: "raft-${{ github.workflow }}-${{ github.ref }}"
+ cancel-in-progress: true
+
+jobs:
+ pylibraft-wheel:
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@main
+ with:
+ repo: rapidsai/raft
+
+ build-type: ${{ inputs.build-type }}
+ branch: ${{ inputs.branch }}
+ sha: ${{ inputs.sha }}
+ date: ${{ inputs.date }}
+
+ package-dir: python/pylibraft
+ package-name: pylibraft
+
+ python-package-versioneer-override: ${{ inputs.versioneer-override }}
+ python-package-build-tag: ${{ inputs.build-tag }}
+
+ skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
+
+ test-extras: test
+ test-unittest: "python -m pytest -v ./python/pylibraft/pylibraft/test"
+ secrets: inherit
+ raft-dask-wheel:
+ needs: pylibraft-wheel
+ uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@main
+ with:
+ repo: rapidsai/raft
+
+ build-type: ${{ inputs.build-type }}
+ branch: ${{ inputs.branch }}
+ sha: ${{ inputs.sha }}
+ date: ${{ inputs.date }}
+
+ package-dir: python/raft-dask
+ package-name: raft_dask
+
+ python-package-versioneer-override: ${{ inputs.versioneer-override }}
+ python-package-build-tag: ${{ inputs.build-tag }}
+
+ skbuild-configure-options: "-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
+
+ test-extras: test
+ test-unittest: "python -m pytest -v ./python/raft-dask/raft_dask/test"
+ secrets: inherit
diff --git a/.gitignore b/.gitignore
index 22c0e8a4a0..5d148b836b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -49,3 +49,6 @@ _skbuild
## doxygen build check inside ci/checks/style.sh
doxygen_check/
+
+## cibuildwheel
+/wheelhouse
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000..1c244200d1
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,102 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+repos:
+ - repo: https://github.com/PyCQA/isort
+ rev: 5.10.1
+ hooks:
+ - id: isort
+ # Use the config file specific to each subproject so that each
+ # project can specify its own first/third-party packages.
+ args: ["--config-root=python/", "--resolve-all-configs"]
+ files: python/.*
+ types_or: [python, cython, pyi]
+ - repo: https://github.com/psf/black
+ rev: 22.3.0
+ hooks:
+ - id: black
+ files: python/.*
+ # Explicitly specify the pyproject.toml at the repo root, not per-project.
+ args: ["--config", "pyproject.toml"]
+ exclude: .*_version.py,.*versioneer.py
+ - repo: https://github.com/PyCQA/flake8
+ rev: 5.0.4
+ hooks:
+ - id: flake8
+ args: ["--config=setup.cfg"]
+ files: python/.*$
+ types: [file]
+ types_or: [python, cython]
+ additional_dependencies: ["flake8-force"]
+ - repo: https://github.com/pre-commit/mirrors-mypy
+ rev: 'v0.971'
+ hooks:
+ - id: mypy
+ additional_dependencies: [types-cachetools]
+ args: ["--config-file=setup.cfg",
+ "python/pylibraft/pylibraft",
+ "python/raft-dask/raft_dask"]
+ pass_filenames: false
+ exclude: .*_version.py
+ - repo: https://github.com/PyCQA/pydocstyle
+ rev: 6.1.1
+ hooks:
+ - id: pydocstyle
+ args: ["--config=setup.cfg"]
+ - repo: https://github.com/pre-commit/mirrors-clang-format
+ rev: v11.1.0
+ hooks:
+ - id: clang-format
+ types_or: [c, c++, cuda]
+ args: ["-fallback-style=none", "-style=file", "-i"]
+ exclude: cpp/include/raft/thirdparty/.*
+ - repo: local
+ hooks:
+ - id: no-deprecationwarning
+ name: no-deprecationwarning
+ description: 'Enforce that DeprecationWarning is not introduced (use FutureWarning instead)'
+ entry: '(category=|\s)DeprecationWarning[,)]'
+ language: pygrep
+ types_or: [python, cython]
+ - id: cmake-format
+ name: cmake-format
+ entry: ./cpp/scripts/run-cmake-format.sh cmake-format
+ language: python
+ types: [cmake]
+ exclude: .*/thirdparty/.*
+ # Note that pre-commit autoupdate does not update the versions
+ # of dependencies, so we'll have to update this manually.
+ additional_dependencies:
+ - cmakelang==0.6.13
+ verbose: true
+ require_serial: true
+ - id: cmake-lint
+ name: cmake-lint
+ entry: ./cpp/scripts/run-cmake-format.sh cmake-lint
+ language: python
+ types: [cmake]
+ # Note that pre-commit autoupdate does not update the versions
+ # of dependencies, so we'll have to update this manually.
+ additional_dependencies:
+ - cmakelang==0.6.13
+ verbose: true
+ require_serial: true
+ exclude: .*/thirdparty/.*
+ - id: copyright-check
+ name: copyright-check
+ entry: python ./ci/checks/copyright.py --git-modified-only --update-current-year
+ language: python
+ pass_filenames: false
+ additional_dependencies: [gitpython]
+ - id: include-check
+ name: include-check
+ entry: python ./cpp/scripts/include_checker.py cpp/bench cpp/include cpp/test
+ pass_filenames: false
+ language: python
+ additional_dependencies: [gitpython]
+ - repo: https://github.com/codespell-project/codespell
+ rev: v2.1.0
+ hooks:
+ - id: codespell
+
+default_language_version:
+ python: python3
diff --git a/README.md b/README.md
index ddaf8b3f8d..e48a1b6193 100755
--- a/README.md
+++ b/README.md
@@ -1,5 +1,18 @@
#
RAFT: Reusable Accelerated Functions and Tools
+[![Build Status](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/raft/job/branches/job/raft-branch-pipeline/badge/icon)](https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/raft/job/branches/job/raft-branch-pipeline/)
+
+## Resources
+
+- [RAFT Reference Documentation](https://docs.rapids.ai/api/raft/stable/): API Documentation.
+- [RAFT Getting Started](./docs/source/quick_start.md): Getting started with RAFT.
+- [Build and Install RAFT](./docs/source/build.md): Instructions for installing and building RAFT.
+- [RAPIDS Community](https://rapids.ai/community.html): Get help, contribute, and collaborate.
+- [GitHub repository](https://github.com/rapidsai/raft): Download the RAFT source code.
+- [Issue tracker](https://github.com/rapidsai/raft/issues): Report issues or request features.
+
+## Overview
+
RAFT contains fundamental widely-used algorithms and primitives for data science and machine learning. The algorithms are CUDA-accelerated and form building-blocks for rapidly composing analytics.
By taking a primitives-based approach to algorithm development, RAFT
@@ -24,7 +37,7 @@ While not exhaustive, the following general categories help summarize the accele
All of RAFT's C++ APIs can be accessed header-only and optional pre-compiled shared libraries can 1) speed up compile times and 2) enable the APIs to be used without CUDA-enabled compilers.
In addition to the C++ library, RAFT also provides 2 Python libraries:
-- `pylibraft` - lightweight low-level Python wrappers around RAFT's host-accessable APIs.
+- `pylibraft` - lightweight low-level Python wrappers around RAFT's host-accessible APIs.
- `raft-dask` - multi-node multi-GPU communicator infrastructure for building distributed algorithms on the GPU with Dask.
## Getting started
@@ -77,11 +90,73 @@ auto metric = raft::distance::DistanceType::L2SqrtExpanded;
raft::distance::pairwise_distance(handle, input.view(), input.view(), output.view(), metric);
```
+It's also possible to create `raft::device_mdspan` views to invoke the same API with raw pointers and shape information:
+
+```c++
+#include
+#include
+#include
+#include
+
+raft::handle_t handle;
+
+int n_samples = 5000;
+int n_features = 50;
+
+float *input;
+int *labels;
+float *output;
+
+...
+// Allocate input, labels, and output pointers
+...
+
+auto input_view = raft::make_device_matrix_view(input, n_samples, n_features);
+auto labels_view = raft::make_device_vector_view(labels, n_samples);
+auto output_view = raft::make_device_matrix_view(output, n_samples, n_samples);
+
+raft::random::make_blobs(handle, input_view, labels_view);
+
+auto metric = raft::distance::DistanceType::L2SqrtExpanded;
+raft::distance::pairwise_distance(handle, input_view, input_view, output_view, metric);
+```
+
+
### Python Example
The `pylibraft` package contains a Python API for RAFT algorithms and primitives. `pylibraft` integrates nicely into other libraries by being very lightweight with minimal dependencies and accepting any object that supports the `__cuda_array_interface__`, such as [CuPy's ndarray](https://docs.cupy.dev/en/stable/user_guide/interoperability.html#rmm). The number of RAFT algorithms exposed in this package is continuing to grow from release to release.
-The example below demonstrates computing the pairwise Euclidean distances between CuPy arrays. `pylibraft` is a low-level API that prioritizes efficiency and simplicity over being pythonic, which is shown here by pre-allocating the output memory before invoking the `pairwise_distance` function. Note that CuPy is not a required dependency for `pylibraft`.
+The example below demonstrates computing the pairwise Euclidean distances between CuPy arrays. Note that CuPy is not a required dependency for `pylibraft`.
+
+```python
+import cupy as cp
+
+from pylibraft.distance import pairwise_distance
+
+n_samples = 5000
+n_features = 50
+
+in1 = cp.random.random_sample((n_samples, n_features), dtype=cp.float32)
+in2 = cp.random.random_sample((n_samples, n_features), dtype=cp.float32)
+
+output = pairwise_distance(in1, in2, metric="euclidean")
+```
+
+The `output` array supports [__cuda_array_interface__](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html#cuda-array-interface-version-2) so it is interoperable with other libraries like CuPy, Numba, and PyTorch that also support it.
+
+Below is an example of converting the output `pylibraft.device_ndarray` to a CuPy array:
+```python
+cupy_array = cp.asarray(output)
+```
+
+And converting to a PyTorch tensor:
+```python
+import torch
+
+torch_tensor = torch.as_tensor(output, device='cuda')
+```
+
+`pylibraft` also supports writing to a pre-allocated output array so any `__cuda_array_interface__` supported array can be written to in-place:
```python
import cupy as cp
@@ -95,12 +170,13 @@ in1 = cp.random.random_sample((n_samples, n_features), dtype=cp.float32)
in2 = cp.random.random_sample((n_samples, n_features), dtype=cp.float32)
output = cp.empty((n_samples, n_samples), dtype=cp.float32)
-pairwise_distance(in1, in2, output, metric="euclidean")
+pairwise_distance(in1, in2, out=output, metric="euclidean")
```
+
## Installing
-RAFT itself can be installed through conda, [Cmake Package Manager (CPM)](https://github.com/cpm-cmake/CPM.cmake), or by building the repository from source. Please refer to the [build instructions](docs/source/build.md) for more a comprehensive guide on building RAFT and using it in downstream projects.
+RAFT itself can be installed through conda, [Cmake Package Manager (CPM)](https://github.com/cpm-cmake/CPM.cmake), pip, or by building the repository from source. Please refer to the [build instructions](docs/source/build.md) for more a comprehensive guide on building RAFT and using it in downstream projects.
### Conda
@@ -120,6 +196,14 @@ You can also install the `libraft-*` conda packages individually using the `mamb
After installing RAFT, `find_package(raft COMPONENTS nn distance)` can be used in your CUDA/C++ cmake build to compile and/or link against needed dependencies in your raft target. `COMPONENTS` are optional and will depend on the packages installed.
+### Pip
+
+pylibraft and raft-dask both have experimental packages that can be [installed through pip](https://rapids.ai/pip.html#install):
+```bash
+pip install pylibraft-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
+pip install raft-dask-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
+```
+
### Cmake & CPM
RAFT uses the [RAPIDS-CMake](https://github.com/rapidsai/rapids-cmake) library, which makes it simple to include in downstream cmake projects. RAPIDS CMake provides a convenience layer around CPM.
@@ -229,7 +313,7 @@ The folder structure mirrors other RAPIDS repos, with the following folders:
## Contributing
-If you are interested in contributing to the RAFT project, please read our [Contributing guidelines](CONTRIBUTING.md). Refer to the [Developer Guide](DEVELOPER_GUIDE.md) for details on the developer guidelines, workflows, and principals.
+If you are interested in contributing to the RAFT project, please read our [Contributing guidelines](docs/source/contributing.md). Refer to the [Developer Guide](docs/source/developer_guide.md) for details on the developer guidelines, workflows, and principals.
## References
diff --git a/build.sh b/build.sh
index b48465922a..0708c1b89e 100755
--- a/build.sh
+++ b/build.sh
@@ -18,7 +18,7 @@ ARGS=$*
# script, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)
-VALIDARGS="clean libraft pylibraft raft-dask docs tests bench clean -v -g --install --compile-libs --compile-nn --compile-dist --allgpuarch --no-nvtx --show_depr_warn -h --buildfaiss --minimal-deps"
+VALIDARGS="clean libraft pylibraft raft-dask docs tests bench clean -v -g -n --compile-libs --compile-nn --compile-dist --allgpuarch --no-nvtx --show_depr_warn -h --buildfaiss --minimal-deps"
HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench=]
where is:
clean - remove all existing build artifacts and configuration (start over)
@@ -33,6 +33,7 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool= is:
-v - verbose build mode
-g - build for debug
+ -n - no install step
--compile-libs - compile shared libraries for all components
--compile-nn - compile shared library for nn component
--compile-dist - compile shared library for distance and current random components
@@ -44,7 +45,6 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument)
@@ -65,12 +65,14 @@ CMAKE_LOG_LEVEL=""
VERBOSE_FLAG=""
BUILD_ALL_GPU_ARCH=0
BUILD_TESTS=OFF
+BUILD_TYPE=Release
BUILD_BENCH=OFF
BUILD_STATIC_FAISS=OFF
COMPILE_LIBRARIES=OFF
COMPILE_NN_LIBRARY=OFF
COMPILE_DIST_LIBRARY=OFF
ENABLE_NN_DEPENDENCIES=OFF
+INSTALL_TARGET=install
TEST_TARGETS="CLUSTER_TEST;CORE_TEST;DISTANCE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;SPARSE_DIST_TEST;SPARSE_NEIGHBORS_TEST;NEIGHBORS_TEST;STATS_TEST;UTILS_TEST"
BENCH_TARGETS="CLUSTER_BENCH;NEIGHBORS_BENCH;DISTANCE_BENCH;LINALG_BENCH;MATRIX_BENCH;SPARSE_BENCH;RANDOM_BENCH"
@@ -82,7 +84,6 @@ CLEAN=0
UNINSTALL=0
DISABLE_DEPRECATION_WARNINGS=ON
CMAKE_TARGET=""
-INSTALL_TARGET=""
# Set defaults for vars that may not have been defined externally
# FIXME: if INSTALL_PREFIX is not set, check PREFIX, then check
@@ -190,8 +191,8 @@ if (( ${NUMARGS} != 0 )); then
fi
# Process flags
-if hasArg --install; then
- INSTALL_TARGET="install"
+if hasArg -n; then
+ INSTALL_TARGET=""
fi
if hasArg --minimal-deps; then
@@ -336,6 +337,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
cmake -S ${REPODIR}/cpp -B ${LIBRAFT_BUILD_DIR} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-DCMAKE_CUDA_ARCHITECTURES=${RAFT_CMAKE_CUDA_ARCHITECTURES} \
+ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DRAFT_COMPILE_LIBRARIES=${COMPILE_LIBRARIES} \
-DRAFT_ENABLE_NN_DEPENDENCIES=${ENABLE_NN_DEPENDENCIES} \
-DRAFT_NVTX=${NVTX} \
diff --git a/ci/checks/black_lists.sh b/ci/checks/black_lists.sh
index 849b354d08..cf289c120c 100755
--- a/ci/checks/black_lists.sh
+++ b/ci/checks/black_lists.sh
@@ -4,7 +4,7 @@
# RAFT black listed function call Tester #
##########################################
-# PR_TARGET_BRANCH is set by the CI enviroment
+# PR_TARGET_BRANCH is set by the CI environment
git checkout --quiet $PR_TARGET_BRANCH
diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py
index 6b808cc051..bfef5392f5 100644
--- a/ci/checks/copyright.py
+++ b/ci/checks/copyright.py
@@ -35,10 +35,9 @@
re.compile(r"CMakeLists[.]txt$"),
re.compile(r"CMakeLists_standalone[.]txt$"),
re.compile(r"setup[.]cfg$"),
- re.compile(r"[.]flake8[.]cython$"),
re.compile(r"meta[.]yaml$")
]
-ExemptFiles = []
+ExemptFiles = ["cpp/include/raft/spatial/knn/detail/warp_select_faiss.cuh"]
# this will break starting at year 10000, which is probably OK :)
CheckSimple = re.compile(
diff --git a/ci/checks/style.sh b/ci/checks/style.sh
index fb5a64fdac..f8fcbe19f8 100644
--- a/ci/checks/style.sh
+++ b/ci/checks/style.sh
@@ -12,69 +12,12 @@ PATH=/opt/conda/bin:$PATH
. /opt/conda/etc/profile.d/conda.sh
conda activate rapids
-# Run flake8 and get results/return code
-FLAKE=`flake8 --exclude=cpp,thirdparty,__init__.py,versioneer.py && flake8 --config=python/.flake8.cython`
-RETVAL=$?
+FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/cmake-format-rapids-cmake.json
+export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
+mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
+wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
-# Output results if failure otherwise show pass
-if [ "$FLAKE" != "" ]; then
- echo -e "\n\n>>>> FAILED: flake8 style check; begin output\n\n"
- echo -e "$FLAKE"
- echo -e "\n\n>>>> FAILED: flake8 style check; end output\n\n"
-else
- echo -e "\n\n>>>> PASSED: flake8 style check\n\n"
-fi
-
-# Check for copyright headers in the files modified currently
-COPYRIGHT=`python ci/checks/copyright.py --git-modified-only 2>&1`
-CR_RETVAL=$?
-if [ "$RETVAL" = "0" ]; then
- RETVAL=$CR_RETVAL
-fi
-
-# Output results if failure otherwise show pass
-if [ "$CR_RETVAL" != "0" ]; then
- echo -e "\n\n>>>> FAILED: copyright check; begin output\n\n"
- echo -e "$COPYRIGHT"
- echo -e "\n\n>>>> FAILED: copyright check; end output\n\n"
-else
- echo -e "\n\n>>>> PASSED: copyright check\n\n"
-fi
-
-# Check for a consistent #include syntax
-HASH_INCLUDE=`python cpp/scripts/include_checker.py \
- cpp/bench \
- cpp/include \
- cpp/test \
- 2>&1`
-HASH_RETVAL=$?
-if [ "$RETVAL" = "0" ]; then
- RETVAL=$HASH_RETVAL
-fi
-
-# Output results if failure otherwise show pass
-if [ "$HASH_RETVAL" != "0" ]; then
- echo -e "\n\n>>>> FAILED: #include check; begin output\n\n"
- echo -e "$HASH_INCLUDE"
- echo -e "\n\n>>>> FAILED: #include check; end output\n\n"
-else
- echo -e "\n\n>>>> PASSED: #include check\n\n"
-fi
-
-# Check for a consistent code format
-FORMAT=`python cpp/scripts/run-clang-format.py 2>&1`
-FORMAT_RETVAL=$?
-if [ "$RETVAL" = "0" ]; then
- RETVAL=$FORMAT_RETVAL
-fi
-
-# Output results if failure otherwise show pass
-if [ "$FORMAT_RETVAL" != "0" ]; then
- echo -e "\n\n>>>> FAILED: clang format check; begin output\n\n"
- echo -e "$FORMAT"
- echo -e "\n\n>>>> FAILED: clang format check; end output\n\n"
-else
- echo -e "\n\n>>>> PASSED: clang format check\n\n"
-fi
+# Run pre-commit checks
+pre-commit run --hook-stage manual --all-files
exit $RETVAL
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 98cb46064c..3162802cbc 100644
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -123,5 +123,5 @@ pytest --cache-clear --junitxml="$WORKSPACE/junit-raft-dask.xml" -v -s
if [ "$(arch)" = "x86_64" ]; then
gpuci_logger "Building docs"
gpuci_mamba_retry install "rapids-doc-env=${MINOR_VERSION}.*"
- "$WORKSPACE/build.sh" docs -v
+ "$WORKSPACE/build.sh" docs -v -n
fi
diff --git a/conda/environments/raft_dev_cuda11.2.yml b/conda/environments/raft_dev_cuda11.2.yml
index caff6996e8..afb2657356 100644
--- a/conda/environments/raft_dev_cuda11.2.yml
+++ b/conda/environments/raft_dev_cuda11.2.yml
@@ -14,7 +14,7 @@ dependencies:
- clang=11.1.0
- clang-tools=11.1.0
- cython>=0.29,<0.30
-- cmake>=3.23.1
+- cmake>=3.23.1,!=3.25.0
- dask>=2022.9.2
- distributed>=2022.9.2
- scikit-build>=0.13.1
diff --git a/conda/environments/raft_dev_cuda11.4.yml b/conda/environments/raft_dev_cuda11.4.yml
index f6b91e0825..54b3f48fb0 100644
--- a/conda/environments/raft_dev_cuda11.4.yml
+++ b/conda/environments/raft_dev_cuda11.4.yml
@@ -14,7 +14,7 @@ dependencies:
- clang=11.1.0
- clang-tools=11.1.0
- cython>=0.29,<0.30
-- cmake>=3.23.1
+- cmake>=3.23.1,!=3.25.0
- dask>=2022.9.2
- distributed>=2022.9.2
- scikit-build>=0.13.1
diff --git a/conda/environments/raft_dev_cuda11.5.yml b/conda/environments/raft_dev_cuda11.5.yml
index 66f6511d6f..6555e5cc83 100644
--- a/conda/environments/raft_dev_cuda11.5.yml
+++ b/conda/environments/raft_dev_cuda11.5.yml
@@ -14,7 +14,7 @@ dependencies:
- clang=11.1.0
- clang-tools=11.1.0
- cython>=0.29,<0.30
-- cmake>=3.23.1
+- cmake>=3.23.1,!=3.25.0
- dask>=2022.9.2
- distributed>=2022.9.2
- scikit-build>=0.13.1
diff --git a/conda/recipes/libraft/build_libraft_distance.sh b/conda/recipes/libraft/build_libraft_distance.sh
index 35a669d6df..35bf354e9b 100644
--- a/conda/recipes/libraft/build_libraft_distance.sh
+++ b/conda/recipes/libraft/build_libraft_distance.sh
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
# Copyright (c) 2022, NVIDIA CORPORATION.
-./build.sh libraft --install -v --allgpuarch --compile-dist --no-nvtx
+./build.sh libraft -v --allgpuarch --compile-dist --no-nvtx
diff --git a/conda/recipes/libraft/build_libraft_headers.sh b/conda/recipes/libraft/build_libraft_headers.sh
index 02ef674787..7bd678c07a 100644
--- a/conda/recipes/libraft/build_libraft_headers.sh
+++ b/conda/recipes/libraft/build_libraft_headers.sh
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
# Copyright (c) 2022, NVIDIA CORPORATION.
-./build.sh libraft --install -v --allgpuarch --no-nvtx
+./build.sh libraft -v --allgpuarch --no-nvtx
diff --git a/conda/recipes/libraft/build_libraft_nn.sh b/conda/recipes/libraft/build_libraft_nn.sh
index caa643a356..773d6ab02e 100644
--- a/conda/recipes/libraft/build_libraft_nn.sh
+++ b/conda/recipes/libraft/build_libraft_nn.sh
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
# Copyright (c) 2022, NVIDIA CORPORATION.
-./build.sh libraft --install -v --allgpuarch --compile-nn --no-nvtx
+./build.sh libraft -v --allgpuarch --compile-nn --no-nvtx
diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml
index c4d0c2a087..fc77dfc89b 100644
--- a/conda/recipes/libraft/conda_build_config.yaml
+++ b/conda/recipes/libraft/conda_build_config.yaml
@@ -11,7 +11,7 @@ sysroot_version:
- "2.17"
cmake_version:
- - ">=3.23.1"
+ - ">=3.23.1,!=3.25.0"
nccl_version:
- ">=2.9.9"
diff --git a/conda/recipes/pylibraft/build.sh b/conda/recipes/pylibraft/build.sh
index 4e64d031ec..2f02fb5a4c 100644
--- a/conda/recipes/pylibraft/build.sh
+++ b/conda/recipes/pylibraft/build.sh
@@ -2,4 +2,4 @@
#!/usr/bin/env bash
# This assumes the script is executed from the root of the repo directory
-./build.sh pylibraft --install --no-nvtx
+./build.sh pylibraft --no-nvtx
diff --git a/conda/recipes/pylibraft/conda_build_config.yaml b/conda/recipes/pylibraft/conda_build_config.yaml
index 725c38cb6a..f16406336b 100644
--- a/conda/recipes/pylibraft/conda_build_config.yaml
+++ b/conda/recipes/pylibraft/conda_build_config.yaml
@@ -11,4 +11,4 @@ sysroot_version:
- "2.17"
cmake_version:
- - ">=3.23.1"
+ - ">=3.23.1,!=3.25.0"
diff --git a/conda/recipes/raft-dask/build.sh b/conda/recipes/raft-dask/build.sh
index 963433dd8d..ec81224e03 100644
--- a/conda/recipes/raft-dask/build.sh
+++ b/conda/recipes/raft-dask/build.sh
@@ -1,6 +1,5 @@
# Copyright (c) 2022, NVIDIA CORPORATION.
#!/usr/bin/env bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
# This assumes the script is executed from the root of the repo directory
-./build.sh raft-dask --install --no-nvtx
+./build.sh raft-dask --no-nvtx
diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml
index a6ca533504..3b42dab182 100644
--- a/conda/recipes/raft-dask/conda_build_config.yaml
+++ b/conda/recipes/raft-dask/conda_build_config.yaml
@@ -14,4 +14,4 @@ ucx_version:
- "1.13.0"
cmake_version:
- - ">=3.23.1"
+ - ">=3.23.1,!=3.25.0"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index fff2148e7e..8a006f01df 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -1,18 +1,15 @@
-#=============================================================================
+# =============================================================================
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#=============================================================================
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
set(RAPIDS_VERSION "23.02")
set(RAFT_VERSION "23.02.00")
@@ -26,18 +23,17 @@ include(rapids-find)
rapids_cuda_init_architectures(RAFT)
-project(RAFT VERSION ${RAFT_VERSION} LANGUAGES CXX CUDA)
-
-# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to
-# have different values for the `Threads::Threads` target. Setting this flag ensures
-# `Threads::Threads` is the same value in first run and subsequent runs.
-set(THREADS_PREFER_PTHREAD_FLAG ON)
+project(
+ RAFT
+ VERSION ${RAFT_VERSION}
+ LANGUAGES CXX CUDA
+)
# Write the version header
rapids_cmake_write_version_file(include/raft/version_config.hpp)
-##############################################################################
-# - build type ---------------------------------------------------------------
+# ##################################################################################################
+# * build type ---------------------------------------------------------------
# Set a default build type if none was specified
rapids_cmake_build_type(Release)
@@ -45,14 +41,16 @@ rapids_cmake_build_type(Release)
# this is needed for clang-tidy runs
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
-##############################################################################
-# - User Options ------------------------------------------------------------
+# ##################################################################################################
+# * User Options ------------------------------------------------------------
option(BUILD_SHARED_LIBS "Build raft shared libraries" ON)
option(BUILD_TESTS "Build raft unit-tests" ON)
option(BUILD_BENCH "Build raft C++ benchmark tests" OFF)
option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF)
-option(CUDA_ENABLE_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF)
+option(CUDA_ENABLE_LINEINFO
+ "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF
+)
option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF)
option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON)
option(DISABLE_DEPRECATION_WARNINGS "Disable deprecaction warnings " ON)
@@ -60,12 +58,26 @@ option(DISABLE_OPENMP "Disable OpenMP" OFF)
option(RAFT_NVTX "Enable nvtx markers" OFF)
option(RAFT_COMPILE_LIBRARIES "Enable building raft shared library instantiations" ${BUILD_TESTS})
-option(RAFT_COMPILE_NN_LIBRARY "Enable building raft nearest neighbors shared library instantiations" ${RAFT_COMPILE_LIBRARIES})
-option(RAFT_COMPILE_DIST_LIBRARY "Enable building raft distant shared library instantiations" ${RAFT_COMPILE_LIBRARIES})
-option(RAFT_ENABLE_NN_DEPENDENCIES "Search for raft::nn dependencies like faiss" ${RAFT_COMPILE_LIBRARIES})
+option(
+ RAFT_COMPILE_NN_LIBRARY "Enable building raft nearest neighbors shared library instantiations"
+ ${RAFT_COMPILE_LIBRARIES}
+)
+option(RAFT_COMPILE_DIST_LIBRARY "Enable building raft distant shared library instantiations"
+ ${RAFT_COMPILE_LIBRARIES}
+)
+option(RAFT_ENABLE_NN_DEPENDENCIES "Search for raft::nn dependencies like faiss"
+ ${RAFT_COMPILE_LIBRARIES}
+)
option(RAFT_ENABLE_thrust_DEPENDENCY "Enable Thrust dependency" ON)
+if(BUILD_TESTS OR BUILD_BENCH)
+ # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs
+ # to have different values for the `Threads::Threads` target. Setting this flag ensures
+ # `Threads::Threads` is the same value in first run and subsequent runs.
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
+endif()
+
if(BUILD_TESTS AND NOT RAFT_ENABLE_thrust_DEPENDENCY)
message(VERBOSE "RAFT: BUILD_TESTS is enabled, overriding RAFT_ENABLE_thrust_DEPENDENCY")
set(RAFT_ENABLE_thrust_DEPENDENCY ON)
@@ -74,7 +86,13 @@ endif()
option(RAFT_EXCLUDE_FAISS_FROM_ALL "Exclude FAISS targets from RAFT's 'all' target" ON)
include(CMakeDependentOption)
-cmake_dependent_option(RAFT_USE_FAISS_STATIC "Build and statically link the FAISS library for nearest neighbors search on GPU" ON RAFT_COMPILE_LIBRARIES OFF)
+cmake_dependent_option(
+ RAFT_USE_FAISS_STATIC
+ "Build and statically link the FAISS library for nearest neighbors search on GPU"
+ ON
+ RAFT_COMPILE_LIBRARIES
+ OFF
+)
message(VERBOSE "RAFT: Building optional components: ${raft_FIND_COMPONENTS}")
message(VERBOSE "RAFT: Build RAFT unit-tests: ${BUILD_TESTS}")
@@ -85,43 +103,45 @@ message(VERBOSE "RAFT: Disable OpenMP: ${DISABLE_OPENMP}")
message(VERBOSE "RAFT: Enable kernel resource usage info: ${CUDA_ENABLE_KERNELINFO}")
message(VERBOSE "RAFT: Enable lineinfo in nvcc: ${CUDA_ENABLE_LINEINFO}")
message(VERBOSE "RAFT: Enable nvtx markers: ${RAFT_NVTX}")
-message(VERBOSE "RAFT: Statically link the CUDA toolkit runtime and libraries: ${CUDA_STATIC_RUNTIME}")
+message(VERBOSE
+ "RAFT: Statically link the CUDA toolkit runtime and libraries: ${CUDA_STATIC_RUNTIME}"
+)
# Set RMM logging level
-set(RMM_LOGGING_LEVEL "INFO" CACHE STRING "Choose the logging level.")
-set_property(CACHE RMM_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" "OFF")
+set(RMM_LOGGING_LEVEL
+ "INFO"
+ CACHE STRING "Choose the logging level."
+)
+set_property(
+ CACHE RMM_LOGGING_LEVEL PROPERTY STRINGS "TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" "OFF"
+)
message(VERBOSE "RAFT: RMM_LOGGING_LEVEL = '${RMM_LOGGING_LEVEL}'.")
-##############################################################################
-# - Conda environment detection ----------------------------------------------
+# ##################################################################################################
+# * Conda environment detection ----------------------------------------------
if(DETECT_CONDA_ENV)
- rapids_cmake_support_conda_env( conda_env MODIFY_PREFIX_PATH )
- if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND DEFINED ENV{CONDA_PREFIX})
- message(STATUS "RAFT: No CMAKE_INSTALL_PREFIX argument detected, setting to: $ENV{CONDA_PREFIX}")
+ rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH)
+ if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND DEFINED ENV{CONDA_PREFIX})
+ message(
+ STATUS "RAFT: No CMAKE_INSTALL_PREFIX argument detected, setting to: $ENV{CONDA_PREFIX}"
+ )
set(CMAKE_INSTALL_PREFIX "$ENV{CONDA_PREFIX}")
endif()
endif()
-##############################################################################
-# - compiler options ---------------------------------------------------------
+# ##################################################################################################
+# * compiler options ---------------------------------------------------------
set(_ctk_static_suffix "")
if(CUDA_STATIC_RUNTIME)
- # If we're statically linking CTK cuBLAS,
- # we also want to statically link BLAS
- set(BLA_STATIC ON)
set(_ctk_static_suffix "_static")
- # Control legacy FindCUDA.cmake behavior too
- # Remove this after we push it into rapids-cmake:
- # https://github.com/rapidsai/rapids-cmake/pull/259
- set(CUDA_USE_STATIC_CUDA_RUNTIME ON)
endif()
# CUDA runtime
rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
-if (NOT DISABLE_OPENMP)
+if(NOT DISABLE_OPENMP)
find_package(OpenMP)
if(OPENMP_FOUND)
message(VERBOSE "RAFT: OpenMP found in ${OpenMP_CXX_INCLUDE_DIRS}")
@@ -132,13 +152,15 @@ endif()
# * determine GPU architectures
# * enable the CMake CUDA language
# * set other CUDA compilation flags
-rapids_find_package(CUDAToolkit REQUIRED
- BUILD_EXPORT_SET raft-exports
- INSTALL_EXPORT_SET raft-exports)
+rapids_find_package(
+ CUDAToolkit REQUIRED
+ BUILD_EXPORT_SET raft-exports
+ INSTALL_EXPORT_SET raft-exports
+)
include(cmake/modules/ConfigureCUDA.cmake)
-##############################################################################
-# - Requirements -------------------------------------------------------------
+# ##################################################################################################
+# * Requirements -------------------------------------------------------------
if(RAFT_COMPILE_LIBRARIES)
set(RAFT_COMPILE_DIST_LIBRARY ON)
@@ -156,10 +178,13 @@ rapids_cpm_init()
include(cmake/thirdparty/get_thrust.cmake)
include(cmake/thirdparty/get_rmm.cmake)
include(cmake/thirdparty/get_faiss.cmake)
+include(cmake/thirdparty/get_cutlass.cmake)
if(RAFT_ENABLE_cuco_DEPENDENCY)
include(${rapids-cmake-dir}/cpm/cuco.cmake)
- rapids_cpm_cuco(BUILD_EXPORT_SET raft-distance-lib-exports INSTALL_EXPORT_SET raft-distance-lib-exports)
+ rapids_cpm_cuco(
+ BUILD_EXPORT_SET raft-distance-lib-exports INSTALL_EXPORT_SET raft-distance-lib-exports
+ )
endif()
if(BUILD_TESTS)
@@ -171,69 +196,77 @@ if(BUILD_BENCH)
rapids_cpm_gbench()
endif()
-##############################################################################
-# - raft ---------------------------------------------------------------------
+# ##################################################################################################
+# * raft ---------------------------------------------------------------------
add_library(raft INTERFACE)
add_library(raft::raft ALIAS raft)
-target_include_directories(raft INTERFACE
- "$"
- "$")
-
-# Keep RAFT as lightweight as possible.
-# Only CUDA libs and rmm should
-# be used in global target.
-target_link_libraries(raft INTERFACE
- rmm::rmm
- CUDA::cublas${_ctk_static_suffix}
- CUDA::curand${_ctk_static_suffix}
- CUDA::cusolver${_ctk_static_suffix}
- CUDA::cusparse${_ctk_static_suffix}
- $<$:raft::Thrust>
+target_include_directories(
+ raft INTERFACE "$" "$"
+)
+
+# Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
+target_link_libraries(
+ raft
+ INTERFACE rmm::rmm
+ CUDA::cublas${_ctk_static_suffix}
+ CUDA::curand${_ctk_static_suffix}
+ CUDA::cusolver${_ctk_static_suffix}
+ CUDA::cusparse${_ctk_static_suffix}
+ $<$:raft::Thrust>
)
target_compile_features(raft INTERFACE cxx_std_17 $)
if(RAFT_COMPILE_DIST_LIBRARY OR RAFT_COMPILE_NN_LIBRARY)
- file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld"
- [=[
+ file(
+ WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld"
+ [=[
SECTIONS
{
.nvFatBinSegment : { *(.nvFatBinSegment) }
.nv_fatbin : { *(.nv_fatbin) }
}
-]=])
+]=]
+ )
endif()
-##############################################################################
-# - NVTX support in raft -----------------------------------------------------
+# ##################################################################################################
+# * NVTX support in raft -----------------------------------------------------
-if (RAFT_NVTX)
+if(RAFT_NVTX)
# This enables NVTX within the project with no option to disable it downstream.
target_link_libraries(raft INTERFACE CUDA::nvToolsExt)
target_compile_definitions(raft INTERFACE NVTX_ENABLED)
else()
- # Allow enable NVTX downstream if not set here.
- # This creates a new option at build/install time, which is set by default to OFF,
- # but can be enabled in the dependent project.
- get_property(nvtx_option_help_string CACHE RAFT_NVTX PROPERTY HELPSTRING)
- string(CONCAT nvtx_export_string
- "option(RAFT_NVTX \"" ${nvtx_option_help_string} "\" OFF)"
- [=[
+ # Allow enable NVTX downstream if not set here. This creates a new option at build/install time,
+ # which is set by default to OFF, but can be enabled in the dependent project.
+ get_property(
+ nvtx_option_help_string
+ CACHE RAFT_NVTX
+ PROPERTY HELPSTRING
+ )
+ string(
+ CONCAT
+ nvtx_export_string
+ "option(RAFT_NVTX \""
+ ${nvtx_option_help_string}
+ "\" OFF)"
+ [=[
target_link_libraries(raft::raft INTERFACE $<$:CUDA::nvToolsExt>)
target_compile_definitions(raft::raft INTERFACE $<$:NVTX_ENABLED>)
- ]=])
+ ]=]
+ )
endif()
-##############################################################################
-# - raft_distance ------------------------------------------------------------
-# TODO:
-# Currently, this package also contains the 'random' namespace (for rmat logic)
-# We couldn't get this to work properly due to strange CI failures as noticed
-# in the PR#778. In the long term, we should rename this package to `raft_compiled`
-# in order to have a single pre-compiled raft package for those who need it.
+# ##################################################################################################
+# * raft_distance ------------------------------------------------------------ TODO: Currently, this
+# package also contains the 'random' namespace (for rmat logic) We couldn't get this to work
+# properly due to strange CI failures as noticed in the PR#778. In the long term, we should rename
+# this package to `raft_compiled` in order to have a single pre-compiled raft package for those
+# who need it.
add_library(raft_distance INTERFACE)
if(TARGET raft_distance AND (NOT TARGET raft::distance))
@@ -243,11 +276,14 @@ endif()
set_target_properties(raft_distance PROPERTIES EXPORT_NAME distance)
if(RAFT_COMPILE_DIST_LIBRARY)
- add_library(raft_distance_lib
+ add_library(
+ raft_distance_lib
src/distance/pairwise_distance.cu
src/distance/fused_l2_min_arg.cu
src/distance/update_centroids_float.cu
src/distance/update_centroids_double.cu
+ src/distance/cluster_cost_float.cu
+ src/distance/cluster_cost_double.cu
src/distance/specializations/detail/canberra.cu
src/distance/specializations/detail/chebyshev.cu
src/distance/specializations/detail/correlation.cu
@@ -262,9 +298,9 @@ if(RAFT_COMPILE_DIST_LIBRARY)
src/distance/specializations/detail/kernels/gram_matrix_base_float.cu
src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu
src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu
-# These are somehow missing a kernel definition which is causing a compile error.
-# src/distance/specializations/detail/kernels/rbf_kernel_double.cu
-# src/distance/specializations/detail/kernels/rbf_kernel_float.cu
+ # These are somehow missing a kernel definition which is causing a compile error.
+ # src/distance/specializations/detail/kernels/rbf_kernel_double.cu
+ # src/distance/specializations/detail/kernels/rbf_kernel_float.cu
src/distance/specializations/detail/kernels/tanh_kernel_double.cu
src/distance/specializations/detail/kernels/tanh_kernel_float.cu
src/distance/specializations/detail/kl_divergence_float_float_float_int.cu
@@ -295,6 +331,21 @@ if(RAFT_COMPILE_DIST_LIBRARY)
src/distance/specializations/fused_l2_nn_double_int64.cu
src/distance/specializations/fused_l2_nn_float_int.cu
src/distance/specializations/fused_l2_nn_float_int64.cu
+ src/nn/specializations/detail/ivfpq_build.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_float_fast.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_float_no_basediff.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_float_no_smem_lut.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_fast.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_no_basediff.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_no_smem_lut.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_fast.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_no_basediff.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_no_smem_lut.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_half_fast.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_half_no_basediff.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_half_no_smem_lut.cu
+ src/nn/specializations/detail/ivfpq_search.cu
+ src/nn/specializations/detail/ivfpq_search_float_uint64_t.cu
src/random/specializations/rmat_rectangular_generator_int_double.cu
src/random/specializations/rmat_rectangular_generator_int64_double.cu
src/random/specializations/rmat_rectangular_generator_int_float.cu
@@ -302,26 +353,27 @@ if(RAFT_COMPILE_DIST_LIBRARY)
)
set_target_properties(
raft_distance_lib
- PROPERTIES OUTPUT_NAME raft_distance
- BUILD_RPATH "\$ORIGIN"
- INSTALL_RPATH "\$ORIGIN"
- CXX_STANDARD 17
- CXX_STANDARD_REQUIRED ON
- CUDA_STANDARD 17
- CUDA_STANDARD_REQUIRED ON
- POSITION_INDEPENDENT_CODE ON
- INTERFACE_POSITION_INDEPENDENT_CODE ON)
-
- target_link_libraries(raft_distance_lib
- PUBLIC raft::raft
- cuco::cuco
- )
- target_compile_options(raft_distance_lib
- PRIVATE "$<$:${RAFT_CXX_FLAGS}>"
- "$<$:${RAFT_CUDA_FLAGS}>"
- )
- target_compile_definitions(raft_distance_lib
- INTERFACE "RAFT_DISTANCE_COMPILED")
+ PROPERTIES OUTPUT_NAME raft_distance
+ BUILD_RPATH "\$ORIGIN"
+ INSTALL_RPATH "\$ORIGIN"
+ CXX_STANDARD 17
+ CXX_STANDARD_REQUIRED ON
+ CUDA_STANDARD 17
+ CUDA_STANDARD_REQUIRED ON
+ POSITION_INDEPENDENT_CODE ON
+ INTERFACE_POSITION_INDEPENDENT_CODE ON
+ )
+
+ target_link_libraries(
+ raft_distance_lib
+ PUBLIC raft::raft cuco::cuco
+ PRIVATE nvidia::cutlass::cutlass
+ )
+ target_compile_options(
+ raft_distance_lib PRIVATE "$<$:${RAFT_CXX_FLAGS}>"
+ "$<$:${RAFT_CUDA_FLAGS}>"
+ )
+ target_compile_definitions(raft_distance_lib INTERFACE "RAFT_DISTANCE_COMPILED")
# ensure CUDA symbols aren't relocated to the middle of the debug build binaries
target_link_options(raft_distance_lib PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
@@ -332,13 +384,13 @@ if(TARGET raft_distance_lib AND (NOT TARGET raft::raft_distance_lib))
add_library(raft::raft_distance_lib ALIAS raft_distance_lib)
endif()
-target_link_libraries(raft_distance INTERFACE
- raft::raft
- $
+target_link_libraries(
+ raft_distance INTERFACE raft::raft $
+ nvidia::cutlass::cutlass
)
-##############################################################################
-# - raft_nn ------------------------------------------------------------------
+# ##################################################################################################
+# * raft_nn ------------------------------------------------------------------
add_library(raft_nn INTERFACE)
if(TARGET raft_nn AND (NOT TARGET raft::nn))
@@ -348,126 +400,170 @@ endif()
set_target_properties(raft_nn PROPERTIES EXPORT_NAME nn)
if(RAFT_COMPILE_NN_LIBRARY)
- add_library(raft_nn_lib
- src/nn/specializations/ball_cover.cu
- src/nn/specializations/detail/ball_cover_lowdim_pass_one_2d.cu
- src/nn/specializations/detail/ball_cover_lowdim_pass_two_2d.cu
- src/nn/specializations/detail/ball_cover_lowdim_pass_one_3d.cu
- src/nn/specializations/detail/ball_cover_lowdim_pass_two_3d.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_float_fast.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_float_no_basediff.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_float_no_smem_lut.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_fast.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_no_basediff.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_no_smem_lut.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_fast.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_no_basediff.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_no_smem_lut.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_half_fast.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_half_no_basediff.cu
- src/nn/specializations/detail/ivfpq_compute_similarity_half_no_smem_lut.cu
- src/nn/specializations/detail/ivfpq_search_float_int64_t.cu
- src/nn/specializations/detail/ivfpq_search_float_uint32_t.cu
- src/nn/specializations/detail/ivfpq_search_float_uint64_t.cu
- src/nn/specializations/fused_l2_knn_long_float_true.cu
- src/nn/specializations/fused_l2_knn_long_float_false.cu
- src/nn/specializations/fused_l2_knn_int_float_true.cu
- src/nn/specializations/fused_l2_knn_int_float_false.cu
- src/nn/specializations/knn.cu
- )
+ add_library(
+ raft_nn_lib
+ src/nn/specializations/ball_cover.cu
+ src/nn/specializations/detail/ball_cover_lowdim_pass_one_2d.cu
+ src/nn/specializations/detail/ball_cover_lowdim_pass_two_2d.cu
+ src/nn/specializations/detail/ball_cover_lowdim_pass_one_3d.cu
+ src/nn/specializations/detail/ball_cover_lowdim_pass_two_3d.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_float_fast.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_float_no_basediff.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_float_no_smem_lut.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_fast.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_no_basediff.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8s_no_smem_lut.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_fast.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_no_basediff.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_fp8u_no_smem_lut.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_half_fast.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_half_no_basediff.cu
+ src/nn/specializations/detail/ivfpq_compute_similarity_half_no_smem_lut.cu
+ src/nn/specializations/detail/ivfpq_build.cu
+ src/nn/specializations/detail/ivfpq_search.cu
+ src/nn/specializations/detail/ivfpq_search_float_int64_t.cu
+ src/nn/specializations/detail/ivfpq_search_float_uint32_t.cu
+ src/nn/specializations/detail/ivfpq_search_float_uint64_t.cu
+ src/nn/specializations/fused_l2_knn_long_float_true.cu
+ src/nn/specializations/fused_l2_knn_long_float_false.cu
+ src/nn/specializations/fused_l2_knn_int_float_true.cu
+ src/nn/specializations/fused_l2_knn_int_float_false.cu
+ src/nn/specializations/knn.cu
+ )
set_target_properties(
raft_nn_lib
- PROPERTIES OUTPUT_NAME raft_nn
- BUILD_RPATH "\$ORIGIN"
- INSTALL_RPATH "\$ORIGIN"
- CXX_STANDARD 17
- CXX_STANDARD_REQUIRED ON
- CUDA_STANDARD 17
- CUDA_STANDARD_REQUIRED ON
- POSITION_INDEPENDENT_CODE ON
- INTERFACE_POSITION_INDEPENDENT_CODE ON)
-
- target_link_libraries(raft_nn_lib
- PUBLIC faiss::faiss
- raft::raft)
- target_compile_options(raft_nn_lib
- PRIVATE "$<$:${RAFT_CXX_FLAGS}>"
- "$<$:${RAFT_CUDA_FLAGS}>"
- )
+ PROPERTIES OUTPUT_NAME raft_nn
+ BUILD_RPATH "\$ORIGIN"
+ INSTALL_RPATH "\$ORIGIN"
+ CXX_STANDARD 17
+ CXX_STANDARD_REQUIRED ON
+ CUDA_STANDARD 17
+ CUDA_STANDARD_REQUIRED ON
+ POSITION_INDEPENDENT_CODE ON
+ INTERFACE_POSITION_INDEPENDENT_CODE ON
+ )
+
+ target_link_libraries(
+ raft_nn_lib
+ PUBLIC faiss::faiss raft::raft
+ PRIVATE nvidia::cutlass::cutlass
+ )
+ target_compile_options(
+ raft_nn_lib PRIVATE "$<$:${RAFT_CXX_FLAGS}>"
+ "$<$:${RAFT_CUDA_FLAGS}>"
+ )
# ensure CUDA symbols aren't relocated to the middle of the debug build binaries
target_link_options(raft_nn_lib PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
- target_compile_definitions(raft_nn_lib
- INTERFACE "RAFT_NN_COMPILED")
+ target_compile_definitions(raft_nn_lib INTERFACE "RAFT_NN_COMPILED")
endif()
if(TARGET raft_nn_lib AND (NOT TARGET raft::raft_nn_lib))
add_library(raft::raft_nn_lib ALIAS raft_nn_lib)
endif()
-target_link_libraries(raft_nn INTERFACE
- raft::raft
- $)
+target_link_libraries(
+ raft_nn INTERFACE raft::raft $ nvidia::cutlass::cutlass
+)
+
+# ##################################################################################################
+# * raft_distributed -------------------------------------------------------------------------------
+add_library(raft_distributed INTERFACE)
+
+if(TARGET raft_distributed AND (NOT TARGET raft::distributed))
+ add_library(raft::distributed ALIAS raft_distributed)
+endif()
+
+set_target_properties(raft_distributed PROPERTIES EXPORT_NAME distributed)
+
+rapids_export_package(BUILD ucx raft-distributed-exports)
+rapids_export_package(INSTALL ucx raft-distributed-exports)
-##############################################################################
-# - install targets-----------------------------------------------------------
-rapids_cmake_install_lib_dir( lib_dir )
+target_link_libraries(raft_distributed INTERFACE ucx::ucp)
+
+# ##################################################################################################
+# * install targets-----------------------------------------------------------
+rapids_cmake_install_lib_dir(lib_dir)
include(GNUInstallDirs)
include(CPack)
-install(TARGETS raft
- DESTINATION ${lib_dir}
- COMPONENT raft
- EXPORT raft-exports)
+install(
+ TARGETS raft
+ DESTINATION ${lib_dir}
+ COMPONENT raft
+ EXPORT raft-exports
+)
-install(TARGETS raft_distance
- DESTINATION ${lib_dir}
- COMPONENT raft
- EXPORT raft-distance-exports)
+install(
+ TARGETS raft_distance
+ DESTINATION ${lib_dir}
+ COMPONENT raft
+ EXPORT raft-distance-exports
+)
-install(TARGETS raft_nn
- DESTINATION ${lib_dir}
- COMPONENT raft
- EXPORT raft-nn-exports)
+install(
+ TARGETS raft_nn
+ DESTINATION ${lib_dir}
+ COMPONENT raft
+ EXPORT raft-nn-exports
+)
if(TARGET raft_distance_lib)
- install(TARGETS raft_distance_lib
- DESTINATION ${lib_dir}
- COMPONENT distance
- EXPORT raft-distance-lib-exports)
- install(DIRECTORY include/raft_distance
- DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
- COMPONENT distance)
+ install(
+ TARGETS raft_distance_lib
+ DESTINATION ${lib_dir}
+ COMPONENT distance
+ EXPORT raft-distance-lib-exports
+ )
+ install(
+ DIRECTORY include/raft_distance
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+ COMPONENT distance
+ )
endif()
if(TARGET raft_nn_lib)
- install(TARGETS raft_nn_lib
- DESTINATION ${lib_dir}
- COMPONENT nn
- EXPORT raft-nn-lib-exports)
+ install(
+ TARGETS raft_nn_lib
+ DESTINATION ${lib_dir}
+ COMPONENT nn
+ EXPORT raft-nn-lib-exports
+ )
endif()
+install(
+ TARGETS raft_distributed
+ DESTINATION ${lib_dir}
+ COMPONENT distributed
+ EXPORT raft-distributed-exports
+)
-install(DIRECTORY include/raft
- COMPONENT raft
- DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+install(
+ DIRECTORY include/raft
+ COMPONENT raft
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+)
# Temporary install of raft.hpp while the file is removed
-install(FILES include/raft.hpp
- COMPONENT raft
- DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/raft)
+install(
+ FILES include/raft.hpp
+ COMPONENT raft
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/raft
+)
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/raft/version_config.hpp
- COMPONENT raft
- DESTINATION include/raft)
+install(
+ FILES ${CMAKE_CURRENT_BINARY_DIR}/include/raft/version_config.hpp
+ COMPONENT raft
+ DESTINATION include/raft
+)
-##############################################################################
-# - export/install optional components --------------------------------------
+# ##################################################################################################
+# * export/install optional components --------------------------------------
include("${rapids-cmake-dir}/export/write_dependencies.cmake")
-set(raft_components distance nn)
-set(raft_install_comp raft raft)
+set(raft_components distance nn distributed)
+set(raft_install_comp raft raft raft)
if(TARGET raft_distance_lib)
list(APPEND raft_components distance-lib)
list(APPEND raft_install_comp distance)
@@ -479,30 +575,31 @@ endif()
foreach(comp install_comp IN ZIP_LISTS raft_components raft_install_comp)
install(
- EXPORT raft-${comp}-exports
- FILE raft-${comp}-targets.cmake
- NAMESPACE raft::
- DESTINATION "${lib_dir}/cmake/raft"
- COMPONENT ${install_comp}
+ EXPORT raft-${comp}-exports
+ FILE raft-${comp}-targets.cmake
+ NAMESPACE raft::
+ DESTINATION "${lib_dir}/cmake/raft"
+ COMPONENT ${install_comp}
)
export(
- EXPORT raft-${comp}-exports
- FILE ${RAFT_BINARY_DIR}/raft-${comp}-targets.cmake
- NAMESPACE raft::
+ EXPORT raft-${comp}-exports
+ FILE ${RAFT_BINARY_DIR}/raft-${comp}-targets.cmake
+ NAMESPACE raft::
)
rapids_export_write_dependencies(
- BUILD raft-${comp}-exports "${PROJECT_BINARY_DIR}/raft-${comp}-dependencies.cmake"
+ BUILD raft-${comp}-exports "${PROJECT_BINARY_DIR}/raft-${comp}-dependencies.cmake"
)
rapids_export_write_dependencies(
- INSTALL raft-${comp}-exports "${PROJECT_BINARY_DIR}/rapids-cmake/raft/export/${install_comp}/raft-${comp}-dependencies.cmake"
+ INSTALL raft-${comp}-exports
+ "${PROJECT_BINARY_DIR}/rapids-cmake/raft/export/${install_comp}/raft-${comp}-dependencies.cmake"
)
endforeach()
-##############################################################################
-# - install export -----------------------------------------------------------
+# ##################################################################################################
+# * install export -----------------------------------------------------------
set(doc_string
- [=[
+ [=[
Provide targets for the RAFT: Reusable Accelerated Functions and Tools
RAFT contains fundamental widely-used algorithms and primitives
@@ -511,27 +608,35 @@ for data science and machine learning.
Optional Components:
- nn
- distance
+ - distributed
Imported Targets:
- raft::raft
- raft::nn brought in by the `nn` optional component
- raft::distance brought in by the `distance` optional component
+ - raft::distributed brought in by the `distributed` optional component
-]=])
+]=]
+)
set(code_string ${nvtx_export_string})
if(RAFT_ENABLE_thrust_DEPENDENCY)
- string(APPEND code_string
- [=[
+ string(
+ APPEND
+ code_string
+ [=[
if(NOT TARGET raft::Thrust)
thrust_create_target(raft::Thrust FROM_OPTIONS)
endif()
- ]=])
+ ]=]
+ )
endif()
-string(APPEND code_string
-[=[
+string(
+ APPEND
+ code_string
+ [=[
if(distance IN_LIST raft_FIND_COMPONENTS)
enable_language(CUDA)
endif()
@@ -545,45 +650,58 @@ if(nn IN_LIST raft_FIND_COMPONENTS)
add_library(faiss ALIAS faiss::faiss)
endif()
endif()
-]=])
+]=]
+)
# Use `rapids_export` for 22.04 as it will have COMPONENT support
include(cmake/modules/raft_export.cmake)
-raft_export(INSTALL raft
- COMPONENTS nn distance
- EXPORT_SET raft-exports
- GLOBAL_TARGETS raft nn distance
- NAMESPACE raft::
- DOCUMENTATION doc_string
- FINAL_CODE_BLOCK code_string)
-
-##############################################################################
-# - build export -------------------------------------------------------------
-raft_export(BUILD raft
- EXPORT_SET raft-exports
- COMPONENTS nn distance
- GLOBAL_TARGETS raft raft_distance raft_nn
- DOCUMENTATION doc_string
- NAMESPACE raft::
- FINAL_CODE_BLOCK code_string)
-
-##############################################################################
-# - build test executable ----------------------------------------------------
+raft_export(
+ INSTALL raft COMPONENTS nn distance distributed EXPORT_SET raft-exports GLOBAL_TARGETS raft nn
+ distance distributed NAMESPACE raft:: DOCUMENTATION doc_string FINAL_CODE_BLOCK code_string
+)
+
+# ##################################################################################################
+# * build export -------------------------------------------------------------
+raft_export(
+ BUILD
+ raft
+ EXPORT_SET
+ raft-exports
+ COMPONENTS
+ nn
+ distance
+ distributed
+ GLOBAL_TARGETS
+ raft
+ raft_distance
+ distributed
+ raft_nn
+ DOCUMENTATION
+ doc_string
+ NAMESPACE
+ raft::
+ FINAL_CODE_BLOCK
+ code_string
+)
+
+# ##################################################################################################
+# * build test executable ----------------------------------------------------
if(BUILD_TESTS)
include(test/CMakeLists.txt)
endif()
-##############################################################################
-# - build benchmark executable -----------------------------------------------
+# ##################################################################################################
+# * build benchmark executable -----------------------------------------------
if(BUILD_BENCH)
include(bench/CMakeLists.txt)
endif()
-##############################################################################
-# - doxygen targets ----------------------------------------------------------
+# ##################################################################################################
+# * doxygen targets ----------------------------------------------------------
include(cmake/doxygen.cmake)
-add_doxygen_target(IN_DOXYFILE doxygen/Doxyfile.in
- OUT_DOXYFILE ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
- CWD ${CMAKE_CURRENT_BINARY_DIR})
+add_doxygen_target(
+ IN_DOXYFILE doxygen/Doxyfile.in OUT_DOXYFILE ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile CWD
+ ${CMAKE_CURRENT_BINARY_DIR}
+)
diff --git a/cpp/bench/CMakeLists.txt b/cpp/bench/CMakeLists.txt
index 81e894fbbc..4e6b6ceb40 100644
--- a/cpp/bench/CMakeLists.txt
+++ b/cpp/bench/CMakeLists.txt
@@ -1,141 +1,134 @@
-#=============================================================================
+# =============================================================================
# Copyright (c) 2022, NVIDIA CORPORATION.
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#=============================================================================
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
-###################################################################################################
-# - compiler function -----------------------------------------------------------------------------
+# ##################################################################################################
+# * compiler function -----------------------------------------------------------------------------
function(ConfigureBench)
-set(options OPTIONAL DIST NN)
-set(oneValueArgs NAME )
-set(multiValueArgs PATH TARGETS CONFIGURATIONS)
-
-cmake_parse_arguments(ConfigureBench "${options}" "${oneValueArgs}"
- "${multiValueArgs}" ${ARGN} )
-
-set(BENCH_NAME ${ConfigureBench_NAME})
-
-add_executable(${BENCH_NAME} ${ConfigureBench_PATH})
-
-target_link_libraries(${BENCH_NAME}
- PRIVATE
- raft::raft
- $<$:raft::distance>
- $<$:raft::nn>
- benchmark::benchmark
- Threads::Threads
- $
- $
- )
-
-set_target_properties(${BENCH_NAME}
- PROPERTIES
- # set target compile options
- INSTALL_RPATH "\$ORIGIN/../../../lib"
- CXX_STANDARD 17
- CXX_STANDARD_REQUIRED ON
- CUDA_STANDARD 17
- CUDA_STANDARD_REQUIRED ON
- POSITION_INDEPENDENT_CODE ON
- INTERFACE_POSITION_INDEPENDENT_CODE ON
- )
-
-target_compile_options(${BENCH_NAME}
- PRIVATE "$<$:${RAFT_CXX_FLAGS}>"
- "$<$:${RAFT_CUDA_FLAGS}>"
- )
-
-target_include_directories(${BENCH_NAME}
- PUBLIC "$"
- )
-
-install(
- TARGETS ${BENCH_NAME}
- COMPONENT testing
- DESTINATION bin/gbench/libraft
- EXCLUDE_FROM_ALL)
+ set(options OPTIONAL DIST NN)
+ set(oneValueArgs NAME)
+ set(multiValueArgs PATH TARGETS CONFIGURATIONS)
+
+ cmake_parse_arguments(ConfigureBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ set(BENCH_NAME ${ConfigureBench_NAME})
+
+ add_executable(${BENCH_NAME} ${ConfigureBench_PATH})
+
+ target_link_libraries(
+ ${BENCH_NAME}
+ PRIVATE raft::raft
+ $<$:raft::distance>
+ $<$:raft::nn>
+ benchmark::benchmark
+ Threads::Threads
+ $
+ $
+ )
+
+ set_target_properties(
+ ${BENCH_NAME}
+ PROPERTIES # set target compile options
+ INSTALL_RPATH "\$ORIGIN/../../../lib"
+ CXX_STANDARD 17
+ CXX_STANDARD_REQUIRED ON
+ CUDA_STANDARD 17
+ CUDA_STANDARD_REQUIRED ON
+ POSITION_INDEPENDENT_CODE ON
+ INTERFACE_POSITION_INDEPENDENT_CODE ON
+ )
+
+ target_compile_options(
+ ${BENCH_NAME} PRIVATE "$<$:${RAFT_CXX_FLAGS}>"
+ "$<$:${RAFT_CUDA_FLAGS}>"
+ )
+
+ target_include_directories(${BENCH_NAME} PUBLIC "$")
+
+ install(
+ TARGETS ${BENCH_NAME}
+ COMPONENT testing
+ DESTINATION bin/gbench/libraft
+ EXCLUDE_FROM_ALL
+ )
endfunction()
if(BUILD_BENCH)
- ConfigureBench(NAME CLUSTER_BENCH
- PATH
- bench/cluster/kmeans_balanced.cu
- bench/cluster/kmeans.cu
- bench/main.cpp
- OPTIONAL DIST NN
- )
-
- ConfigureBench(NAME DISTANCE_BENCH
- PATH
- bench/distance/distance_cosine.cu
- bench/distance/distance_exp_l2.cu
- bench/distance/distance_l1.cu
- bench/distance/distance_unexp_l2.cu
- bench/distance/fused_l2_nn.cu
- bench/distance/kernels.cu
- bench/main.cpp
- OPTIONAL DIST
- )
-
- ConfigureBench(NAME LINALG_BENCH
- PATH
- bench/linalg/add.cu
- bench/linalg/map_then_reduce.cu
- bench/linalg/matrix_vector_op.cu
- bench/linalg/reduce_rows_by_key.cu
- bench/linalg/reduce.cu
- bench/main.cpp
- )
-
- ConfigureBench(NAME MATRIX_BENCH
- PATH
- bench/matrix/argmin.cu
- bench/main.cpp
- )
-
- ConfigureBench(NAME RANDOM_BENCH
- PATH
- bench/random/make_blobs.cu
- bench/random/permute.cu
- bench/random/rng.cu
- bench/main.cpp
- )
-
- ConfigureBench(NAME SPARSE_BENCH
- PATH
- bench/sparse/convert_csr.cu
- bench/main.cpp
- )
-
- ConfigureBench(NAME NEIGHBORS_BENCH
- PATH
- bench/neighbors/knn/brute_force_float_int64_t.cu
- bench/neighbors/knn/brute_force_float_uint32_t.cu
- bench/neighbors/knn/ivf_flat_float_int64_t.cu
- bench/neighbors/knn/ivf_flat_float_uint32_t.cu
- bench/neighbors/knn/ivf_flat_int8_t_int64_t.cu
- bench/neighbors/knn/ivf_flat_uint8_t_uint32_t.cu
- bench/neighbors/knn/ivf_pq_float_int64_t.cu
- bench/neighbors/knn/ivf_pq_float_uint32_t.cu
- bench/neighbors/knn/ivf_pq_int8_t_int64_t.cu
- bench/neighbors/knn/ivf_pq_uint8_t_uint32_t.cu
- bench/neighbors/selection.cu
- bench/main.cpp
- OPTIONAL DIST NN
- )
+ ConfigureBench(
+ NAME CLUSTER_BENCH PATH bench/cluster/kmeans_balanced.cu bench/cluster/kmeans.cu bench/main.cpp
+ OPTIONAL DIST NN
+ )
+
+ ConfigureBench(
+ NAME
+ DISTANCE_BENCH
+ PATH
+ bench/distance/distance_cosine.cu
+ bench/distance/distance_exp_l2.cu
+ bench/distance/distance_l1.cu
+ bench/distance/distance_unexp_l2.cu
+ bench/distance/fused_l2_nn.cu
+ bench/distance/kernels.cu
+ bench/main.cpp
+ OPTIONAL
+ DIST
+ )
+
+ ConfigureBench(
+ NAME
+ LINALG_BENCH
+ PATH
+ bench/linalg/add.cu
+ bench/linalg/map_then_reduce.cu
+ bench/linalg/matrix_vector_op.cu
+ bench/linalg/norm.cu
+ bench/linalg/normalize.cu
+ bench/linalg/reduce_rows_by_key.cu
+ bench/linalg/reduce.cu
+ bench/main.cpp
+ )
+
+ ConfigureBench(NAME MATRIX_BENCH PATH bench/matrix/argmin.cu bench/main.cpp)
+
+ ConfigureBench(
+ NAME RANDOM_BENCH PATH bench/random/make_blobs.cu bench/random/permute.cu bench/random/rng.cu
+ bench/main.cpp
+ )
+
+ ConfigureBench(NAME SPARSE_BENCH PATH bench/sparse/convert_csr.cu bench/main.cpp)
+
+ ConfigureBench(
+ NAME
+ NEIGHBORS_BENCH
+ PATH
+ bench/neighbors/knn/brute_force_float_int64_t.cu
+ bench/neighbors/knn/brute_force_float_uint32_t.cu
+ bench/neighbors/knn/ivf_flat_float_int64_t.cu
+ bench/neighbors/knn/ivf_flat_float_uint32_t.cu
+ bench/neighbors/knn/ivf_flat_int8_t_int64_t.cu
+ bench/neighbors/knn/ivf_flat_uint8_t_uint32_t.cu
+ bench/neighbors/knn/ivf_pq_float_int64_t.cu
+ bench/neighbors/knn/ivf_pq_float_uint32_t.cu
+ bench/neighbors/knn/ivf_pq_int8_t_int64_t.cu
+ bench/neighbors/knn/ivf_pq_uint8_t_uint32_t.cu
+ bench/neighbors/refine.cu
+ bench/neighbors/selection.cu
+ bench/main.cpp
+ OPTIONAL
+ DIST
+ NN
+ )
endif()
-
diff --git a/cpp/bench/linalg/norm.cu b/cpp/bench/linalg/norm.cu
new file mode 100644
index 0000000000..cce4195cf1
--- /dev/null
+++ b/cpp/bench/linalg/norm.cu
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+namespace raft::bench::linalg {
+
+template
+struct norm_input {
+ IdxT rows, cols;
+};
+
+template
+inline auto operator<<(std::ostream& os, const norm_input& p) -> std::ostream&
+{
+ os << p.rows << "#" << p.cols;
+ return os;
+}
+
+template
+struct rowNorm : public fixture {
+ rowNorm(const norm_input& p) : params(p), in(p.rows * p.cols, stream), dots(p.rows, stream)
+ {
+ raft::random::RngState rng{1234};
+ raft::random::uniform(rng, in.data(), p.rows * p.cols, (T)-10.0, (T)10.0, stream);
+ }
+
+ void run_benchmark(::benchmark::State& state) override
+ {
+ std::ostringstream label_stream;
+ label_stream << params;
+ state.SetLabel(label_stream.str());
+
+ loop_on_state(state, [this]() {
+ auto input_view = raft::make_device_matrix_view(
+ in.data(), params.rows, params.cols);
+ auto output_view =
+ raft::make_device_vector_view(dots.data(), params.rows);
+ raft::linalg::norm(handle,
+ input_view,
+ output_view,
+ raft::linalg::L2Norm,
+ raft::linalg::Apply::ALONG_ROWS,
+ raft::SqrtOp());
+ });
+ }
+
+ private:
+ norm_input params;
+ rmm::device_uvector in, dots;
+}; // struct rowNorm
+
+const std::vector> norm_inputs_i32 =
+ raft::util::itertools::product>({10, 100, 1000, 10000, 100000},
+ {16, 32, 64, 128, 256, 512, 1024});
+const std::vector> norm_inputs_i64 =
+ raft::util::itertools::product>({10, 100, 1000, 10000, 100000},
+ {16, 32, 64, 128, 256, 512, 1024});
+
+RAFT_BENCH_REGISTER((rowNorm), "", norm_inputs_i32);
+RAFT_BENCH_REGISTER((rowNorm), "", norm_inputs_i32);
+RAFT_BENCH_REGISTER((rowNorm), "", norm_inputs_i64);
+RAFT_BENCH_REGISTER((rowNorm), "", norm_inputs_i64);
+
+} // namespace raft::bench::linalg
diff --git a/cpp/bench/linalg/normalize.cu b/cpp/bench/linalg/normalize.cu
new file mode 100644
index 0000000000..d01473ffeb
--- /dev/null
+++ b/cpp/bench/linalg/normalize.cu
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+
+#include
+
+namespace raft::bench::linalg {
+
+template
+struct normalize_input {
+ IdxT rows, cols;
+};
+
+template
+inline auto operator<<(std::ostream& os, const normalize_input& p) -> std::ostream&
+{
+ os << p.rows << "#" << p.cols;
+ return os;
+}
+
+template
+struct rowNormalize : public fixture {
+ rowNormalize(const normalize_input& p)
+ : params(p), in(p.rows * p.cols, stream), out(p.rows * p.cols, stream)
+ {
+ raft::random::RngState rng{1234};
+ raft::random::uniform(rng, in.data(), p.rows * p.cols, (T)-10.0, (T)10.0, stream);
+ }
+
+ void run_benchmark(::benchmark::State& state) override
+ {
+ std::ostringstream label_stream;
+ label_stream << params;
+ state.SetLabel(label_stream.str());
+
+ loop_on_state(state, [this]() {
+ auto input_view = raft::make_device_matrix_view(
+ in.data(), params.rows, params.cols);
+ auto output_view = raft::make_device_matrix_view(
+ out.data(), params.rows, params.cols);
+ raft::linalg::row_normalize(handle, input_view, output_view, raft::linalg::L2Norm);
+ });
+ }
+
+ private:
+ normalize_input params;
+ rmm::device_uvector in, out;
+}; // struct rowNormalize
+
+const std::vector> normalize_inputs_i32 =
+ raft::util::itertools::product>(
+ {10, 100, 1000, 10000, 100000}, {8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384});
+const std::vector> normalize_inputs_i64 =
+ raft::util::itertools::product>(
+ {10, 100, 1000, 10000, 100000}, {8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384});
+
+RAFT_BENCH_REGISTER((rowNormalize), "", normalize_inputs_i32);
+RAFT_BENCH_REGISTER((rowNormalize), "", normalize_inputs_i32);
+RAFT_BENCH_REGISTER((rowNormalize), "", normalize_inputs_i64);
+RAFT_BENCH_REGISTER((rowNormalize), "", normalize_inputs_i64);
+
+} // namespace raft::bench::linalg
diff --git a/cpp/bench/neighbors/refine.cu b/cpp/bench/neighbors/refine.cu
new file mode 100644
index 0000000000..a038905ace
--- /dev/null
+++ b/cpp/bench/neighbors/refine.cu
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#if defined RAFT_DISTANCE_COMPILED
+#include
+#endif
+
+#if defined RAFT_NN_COMPILED
+#include
+#endif
+
+#include
+#include
+#include
+
+#include "../../test/neighbors/refine_helper.cuh"
+
+#include
+#include
+
+using namespace raft::neighbors::detail;
+
+namespace raft::bench::neighbors {
+
+template
+inline auto operator<<(std::ostream& os, const RefineInputs& p) -> std::ostream&
+{
+ os << p.n_rows << "#" << p.dim << "#" << p.n_queries << "#" << p.k0 << "#" << p.k << "#"
+ << (p.host_data ? "host" : "device");
+ return os;
+}
+
+RefineInputs p;
+
+template
+class RefineAnn : public fixture {
+ public:
+ RefineAnn(RefineInputs p) : data(handle_, p) {}
+
+ void run_benchmark(::benchmark::State& state) override
+ {
+ std::ostringstream label_stream;
+ label_stream << data.p;
+ state.SetLabel(label_stream.str());
+
+ auto old_mr = rmm::mr::get_current_device_resource();
+ rmm::mr::pool_memory_resource pool_mr(old_mr);
+ rmm::mr::set_current_device_resource(&pool_mr);
+
+ if (data.p.host_data) {
+ loop_on_state(state, [this]() {
+ raft::neighbors::refine(handle_,
+ data.dataset_host.view(),
+ data.queries_host.view(),
+ data.candidates_host.view(),
+ data.refined_indices_host.view(),
+ data.refined_distances_host.view(),
+ data.p.metric);
+ });
+ } else {
+ loop_on_state(state, [&]() {
+ raft::neighbors::refine(handle_,
+ data.dataset.view(),
+ data.queries.view(),
+ data.candidates.view(),
+ data.refined_indices.view(),
+ data.refined_distances.view(),
+ data.p.metric);
+ });
+ }
+ rmm::mr::set_current_device_resource(old_mr);
+ }
+
+ private:
+ raft::handle_t handle_;
+ RefineHelper data;
+};
+
+std::vector> getInputs()
+{
+ std::vector> out;
+ raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded;
+ for (bool host_data : {true, false}) {
+ for (int64_t n_queries : {1000, 10000}) {
+ for (int64_t dim : {128, 512}) {
+ out.push_back(RefineInputs{n_queries, 2000000, dim, 32, 128, metric, host_data});
+ out.push_back(RefineInputs{n_queries, 2000000, dim, 10, 40, metric, host_data});
+ }
+ }
+ }
+ return out;
+}
+
+using refine_float_int64 = RefineAnn;
+RAFT_BENCH_REGISTER(refine_float_int64, "", getInputs());
+
+using refine_uint8_int64 = RefineAnn;
+RAFT_BENCH_REGISTER(refine_uint8_int64, "", getInputs());
+} // namespace raft::bench::neighbors
diff --git a/cpp/cmake/config.json b/cpp/cmake/config.json
new file mode 100644
index 0000000000..f7cc50e513
--- /dev/null
+++ b/cpp/cmake/config.json
@@ -0,0 +1,43 @@
+{
+ "parse": {
+ "additional_commands": {
+ "CPMFindPackage": {
+ "kwargs": {
+ "NAME": 1,
+ "GITHUB_REPOSITORY": "?",
+ "GIT_TAG": "?",
+ "VERSION": "?",
+ "GIT_SHALLOW": "?",
+ "OPTIONS": "*",
+ "FIND_PACKAGE_ARGUMENTS": "*"
+ }
+ },
+ "ConfigureTest": {
+ "flags": ["TEST_NAME", "TEST_SRC"]
+ },
+ "ConfigureBench": {
+ "flags": ["BENCH_NAME", "BENCH_SRC"]
+ }
+ }
+ },
+ "format": {
+ "line_width": 100,
+ "tab_size": 2,
+ "command_case": "unchanged",
+ "max_lines_hwrap": 1,
+ "max_pargs_hwrap": 999,
+ "dangle_parens": true
+ },
+ "lint": {
+ "disabled_codes": ["C0301", "C0111", "C0113"],
+ "function_pattern": "[0-9A-z_]+",
+ "macro_pattern": "[0-9A-z_]+",
+ "global_var_pattern": "[A-z][0-9A-z_]+",
+ "internal_var_pattern": "_[A-z][0-9A-z_]+",
+ "local_var_pattern": "[A-z][A-z0-9_]+",
+ "private_var_pattern": "_[0-9A-z_]+",
+ "public_var_pattern": "[A-z][0-9A-z_]+",
+ "argument_var_pattern": "[A-z][A-z0-9_]+",
+ "keyword_pattern": "[A-z][0-9A-z_]+"
+ }
+}
diff --git a/cpp/cmake/doxygen.cmake b/cpp/cmake/doxygen.cmake
index 7d06ec194c..a6fddbacd6 100644
--- a/cpp/cmake/doxygen.cmake
+++ b/cpp/cmake/doxygen.cmake
@@ -1,16 +1,14 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
#
find_package(Doxygen 1.8.11)
@@ -24,11 +22,13 @@ function(add_doxygen_target)
configure_file(${dox_IN_DOXYFILE} ${dox_OUT_DOXYFILE} @ONLY)
message("Command: ${DOXYGEN_EXECUTABLE} ${dox_OUT_DOXYFILE}")
- add_custom_target(docs_raft
+ add_custom_target(
+ docs_raft
${DOXYGEN_EXECUTABLE} ${dox_OUT_DOXYFILE}
WORKING_DIRECTORY ${dox_CWD}
VERBATIM
- COMMENT "Generate doxygen docs")
+ COMMENT "Generate doxygen docs"
+ )
else()
message("add_doxygen_target: doxygen exe not found")
endif()
diff --git a/cpp/cmake/modules/ConfigureCUDA.cmake b/cpp/cmake/modules/ConfigureCUDA.cmake
index 440c8c4f3a..5e68ca5bc4 100644
--- a/cpp/cmake/modules/ConfigureCUDA.cmake
+++ b/cpp/cmake/modules/ConfigureCUDA.cmake
@@ -1,26 +1,24 @@
-#=============================================================================
+# =============================================================================
# Copyright (c) 2018-2022, NVIDIA CORPORATION.
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#=============================================================================
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
if(DISABLE_DEPRECATION_WARNINGS)
- list(APPEND RAFT_CXX_FLAGS -Wno-deprecated-declarations)
- list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations)
+ list(APPEND RAFT_CXX_FLAGS -Wno-deprecated-declarations)
+ list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations)
endif()
if(CMAKE_COMPILER_IS_GNUCXX)
- list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
+ list(APPEND RAFT_CXX_FLAGS -Wall -Werror -Wno-unknown-pragmas -Wno-error=deprecated-declarations)
endif()
list(APPEND RAFT_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
@@ -31,21 +29,23 @@ list(APPEND RAFT_CUDA_FLAGS -Xfatbin=-compress-all)
# set warnings as errors
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2.0)
- list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings)
+ list(APPEND RAFT_CUDA_FLAGS -Werror=all-warnings)
endif()
list(APPEND RAFT_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
-# Option to enable line info in CUDA device compilation to allow introspection when profiling / memchecking
+# Option to enable line info in CUDA device compilation to allow introspection when profiling /
+# memchecking
if(CUDA_ENABLE_LINEINFO)
- list(APPEND RAFT_CUDA_FLAGS -lineinfo)
+ list(APPEND RAFT_CUDA_FLAGS -lineinfo)
endif()
if(OpenMP_FOUND)
- list(APPEND RAFT_CUDA_FLAGS -Xcompiler=${OpenMP_CXX_FLAGS})
+ list(APPEND RAFT_CUDA_FLAGS -Xcompiler=${OpenMP_CXX_FLAGS})
endif()
# Debug options
if(CMAKE_BUILD_TYPE MATCHES Debug)
- message(VERBOSE "RAFT: Building with debugging flags")
- list(APPEND RAFT_CUDA_FLAGS -G -Xcompiler=-rdynamic)
+ message(VERBOSE "RAFT: Building with debugging flags")
+ list(APPEND RAFT_CUDA_FLAGS -G -Xcompiler=-rdynamic)
+ list(APPEND RAFT_CUDA_FLAGS -Xptxas --suppress-stack-size-warning)
endif()
diff --git a/cpp/cmake/modules/raft_export.cmake b/cpp/cmake/modules/raft_export.cmake
index 748fa8ad26..bcc3578bf8 100644
--- a/cpp/cmake/modules/raft_export.cmake
+++ b/cpp/cmake/modules/raft_export.cmake
@@ -1,18 +1,16 @@
-#=============================================================================
+# =============================================================================
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#=============================================================================
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
include_guard(GLOBAL)
#[=======================================================================[.rst:
@@ -41,14 +39,14 @@ calls to :cmake:command:`find_dependency`, or :cmake:command:`CPMFindPackage`.
.. note::
:cmake:command:`raft_export` always installs to `lib` and doesn't use GNUInstallDirs
- The files generated by :cmake:command:`raft_export` are completly standalone
+ The files generated by :cmake:command:`raft_export` are completely standalone
and don't require the consuming package to use `rapids-cmake`
``project_name``
Name of the project, to be used by consumers when using `find_package`
``GLOBAL_TARGETS``
- Explicitly list what targets should be made globally visibile to
+ Explicitly list what targets should be made globally visible to
the consuming project.
``VERSION``
@@ -61,9 +59,9 @@ calls to :cmake:command:`find_dependency`, or :cmake:command:`CPMFindPackage`.
Depending on the version string different compatibility modes will be used.
+------------------+---------------------+
- | Version String | Compatiblity Type |
+ | Version String | Compatibility Type |
+==================+=====================+
- | None | No checks perfomed |
+ | None | No checks performed |
+------------------+---------------------+
| X | SameMajorVersion |
+------------------+---------------------+
@@ -174,19 +172,26 @@ function(raft_export type project_name)
set(scratch_dir "${PROJECT_BINARY_DIR}/rapids-cmake/${project_name}/export/raft/")
- configure_package_config_file("${CMAKE_CURRENT_FUNCTION_LIST_DIR}/config.cmake.in"
- "${scratch_dir}/${project_name}-config.cmake"
- INSTALL_DESTINATION "${install_location}")
+ configure_package_config_file(
+ "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/config.cmake.in"
+ "${scratch_dir}/${project_name}-config.cmake" INSTALL_DESTINATION "${install_location}"
+ )
if(rapids_version_set)
write_basic_package_version_file(
- "${scratch_dir}/${project_name}-config-version.cmake" VERSION ${rapids_project_version}
- COMPATIBILITY ${rapids_project_version_compat})
+ "${scratch_dir}/${project_name}-config-version.cmake"
+ VERSION ${rapids_project_version}
+ COMPATIBILITY ${rapids_project_version_compat}
+ )
endif()
- install(EXPORT ${RAPIDS_EXPORT_SET} FILE ${project_name}-targets.cmake
- NAMESPACE ${RAPIDS_PROJECT_VERSION} DESTINATION "${install_location}"
- COMPONENT raft)
+ install(
+ EXPORT ${RAPIDS_EXPORT_SET}
+ FILE ${project_name}-targets.cmake
+ NAMESPACE ${RAPIDS_PROJECT_VERSION}
+ DESTINATION "${install_location}"
+ COMPONENT raft
+ )
if(TARGET rapids_export_install_${RAPIDS_EXPORT_SET})
include("${rapids-cmake-dir}/export/write_dependencies.cmake")
@@ -203,41 +208,55 @@ function(raft_export type project_name)
endif()
# Install everything we have generated
- install(DIRECTORY "${scratch_dir}/" DESTINATION "${install_location}"
- COMPONENT raft)
+ install(
+ DIRECTORY "${scratch_dir}/"
+ DESTINATION "${install_location}"
+ COMPONENT raft
+ )
foreach(comp nn distance)
set(scratch_dir "${PROJECT_BINARY_DIR}/rapids-cmake/${project_name}/export/${comp}/")
file(MAKE_DIRECTORY "${scratch_dir}")
- install(DIRECTORY "${scratch_dir}" DESTINATION "${install_location}"
- COMPONENT ${comp})
+ install(
+ DIRECTORY "${scratch_dir}"
+ DESTINATION "${install_location}"
+ COMPONENT ${comp}
+ )
endforeach()
else()
set(install_location "${PROJECT_BINARY_DIR}")
- configure_package_config_file("${CMAKE_CURRENT_FUNCTION_LIST_DIR}/config.cmake.in"
- "${install_location}/${project_name}-config.cmake"
- INSTALL_DESTINATION "${install_location}")
+ configure_package_config_file(
+ "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/config.cmake.in"
+ "${install_location}/${project_name}-config.cmake" INSTALL_DESTINATION "${install_location}"
+ )
if(rapids_version_set)
write_basic_package_version_file(
- "${install_location}/${project_name}-config-version.cmake" VERSION ${rapids_project_version}
- COMPATIBILITY ${rapids_project_version_compat})
+ "${install_location}/${project_name}-config-version.cmake"
+ VERSION ${rapids_project_version}
+ COMPATIBILITY ${rapids_project_version_compat}
+ )
endif()
- export(EXPORT ${RAPIDS_EXPORT_SET} NAMESPACE ${RAPIDS_PROJECT_VERSION}
- FILE "${install_location}/${project_name}-targets.cmake")
+ export(
+ EXPORT ${RAPIDS_EXPORT_SET}
+ NAMESPACE ${RAPIDS_PROJECT_VERSION}
+ FILE "${install_location}/${project_name}-targets.cmake"
+ )
if(TARGET rapids_export_build_${RAPIDS_EXPORT_SET})
include("${rapids-cmake-dir}/export/write_dependencies.cmake")
- rapids_export_write_dependencies(BUILD ${RAPIDS_EXPORT_SET}
- "${install_location}/${project_name}-dependencies.cmake")
+ rapids_export_write_dependencies(
+ BUILD ${RAPIDS_EXPORT_SET} "${install_location}/${project_name}-dependencies.cmake"
+ )
endif()
if(DEFINED RAPIDS_LANGUAGES)
include("${rapids-cmake-dir}/export/write_language.cmake")
foreach(lang IN LISTS RAPIDS_LANGUAGES)
- rapids_export_write_language(BUILD ${lang}
- "${install_location}/${project_name}-${lang}-language.cmake")
+ rapids_export_write_language(
+ BUILD ${lang} "${install_location}/${project_name}-${lang}-language.cmake"
+ )
endforeach()
endif()
diff --git a/cpp/cmake/thirdparty/get_cutlass.cmake b/cpp/cmake/thirdparty/get_cutlass.cmake
new file mode 100644
index 0000000000..811a5466c3
--- /dev/null
+++ b/cpp/cmake/thirdparty/get_cutlass.cmake
@@ -0,0 +1,99 @@
+# =============================================================================
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+function(find_and_configure_cutlass)
+ set(oneValueArgs VERSION REPOSITORY PINNED_TAG)
+ cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+
+ # if(RAFT_ENABLE_DIST_DEPENDENCIES OR RAFT_COMPILE_LIBRARIES)
+ set(CUTLASS_ENABLE_HEADERS_ONLY
+ ON
+ CACHE BOOL "Enable only the header library"
+ )
+ set(CUTLASS_NAMESPACE
+ "raft_cutlass"
+ CACHE STRING "Top level namespace of CUTLASS"
+ )
+ set(CUTLASS_ENABLE_CUBLAS
+ OFF
+ CACHE BOOL "Disable CUTLASS to build with cuBLAS library."
+ )
+
+ rapids_cpm_find(
+ NvidiaCutlass ${PKG_VERSION}
+ GLOBAL_TARGETS nvidia::cutlass::cutlass
+ CPM_ARGS
+ GIT_REPOSITORY ${PKG_REPOSITORY}
+ GIT_TAG ${PKG_PINNED_TAG}
+ GIT_SHALLOW TRUE
+ OPTIONS "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}"
+ )
+
+ if(TARGET CUTLASS AND NOT TARGET nvidia::cutlass::cutlass)
+ add_library(nvidia::cutlass::cutlass ALIAS CUTLASS)
+ endif()
+
+ if(NvidiaCutlass_ADDED)
+ rapids_export(
+ BUILD NvidiaCutlass
+ EXPORT_SET NvidiaCutlass
+ GLOBAL_TARGETS nvidia::cutlass::cutlass
+ NAMESPACE nvidia::cutlass::
+ )
+ endif()
+ # endif()
+
+ # We generate the cutlass-config files when we built cutlass locally, so always do
+ # `find_dependency`
+ rapids_export_package(
+ BUILD NvidiaCutlass raft-distance-exports GLOBAL_TARGETS nvidia::cutlass::cutlass
+ )
+ rapids_export_package(
+ INSTALL NvidiaCutlass raft-distance-exports GLOBAL_TARGETS nvidia::cutlass::cutlass
+ )
+ rapids_export_package(
+ BUILD NvidiaCutlass raft-nn-exports GLOBAL_TARGETS nvidia::cutlass::cutlass
+ )
+ rapids_export_package(
+ INSTALL NvidiaCutlass raft-nn-exports GLOBAL_TARGETS nvidia::cutlass::cutlass
+ )
+
+ # Tell cmake where it can find the generated NvidiaCutlass-config.cmake we wrote.
+ include("${rapids-cmake-dir}/export/find_package_root.cmake")
+ rapids_export_find_package_root(
+ INSTALL NvidiaCutlass [=[${CMAKE_CURRENT_LIST_DIR}/../]=] raft-distance-exports
+ )
+ rapids_export_find_package_root(
+ BUILD NvidiaCutlass [=[${CMAKE_CURRENT_LIST_DIR}]=] raft-distance-exports
+ )
+ include("${rapids-cmake-dir}/export/find_package_root.cmake")
+ rapids_export_find_package_root(
+ INSTALL NvidiaCutlass [=[${CMAKE_CURRENT_LIST_DIR}/../]=] raft-nn-exports
+ )
+ rapids_export_find_package_root(
+ BUILD NvidiaCutlass [=[${CMAKE_CURRENT_LIST_DIR}]=] raft-nn-exports
+ )
+endfunction()
+
+if(NOT RAFT_CUTLASS_GIT_TAG)
+ set(RAFT_CUTLASS_GIT_TAG v2.9.1)
+endif()
+
+if(NOT RAFT_CUTLASS_GIT_REPOSITORY)
+ set(RAFT_CUTLASS_GIT_REPOSITORY https://github.com/NVIDIA/cutlass.git)
+endif()
+
+find_and_configure_cutlass(
+ VERSION 2.9.1 REPOSITORY ${RAFT_CUTLASS_GIT_REPOSITORY} PINNED_TAG ${RAFT_CUTLASS_GIT_TAG}
+)
diff --git a/cpp/include/raft.hpp b/cpp/include/raft.hpp
index 6a4f323c58..e80d9a85fa 100644
--- a/cpp/include/raft.hpp
+++ b/cpp/include/raft.hpp
@@ -28,10 +28,10 @@ namespace raft {
/* Function for testing RAFT include
*
- * @return message indicating RAFT has been included succesfully*/
+ * @return message indicating RAFT has been included successfully*/
inline std::string test_raft()
{
- std::string status = "RAFT Setup succesfully";
+ std::string status = "RAFT Setup successfully";
return status;
}
diff --git a/cpp/include/raft/cluster/detail/kmeans.cuh b/cpp/include/raft/cluster/detail/kmeans.cuh
index 3d71db96c5..5aa9870b46 100644
--- a/cpp/include/raft/cluster/detail/kmeans.cuh
+++ b/cpp/include/raft/cluster/detail/kmeans.cuh
@@ -939,7 +939,7 @@ void kmeans_fit(handle_t const& handle,
RAFT_LOG_DEBUG(
"KMeans.fit (Iteration-%d/%d): initialize cluster centers from "
"the ndarray array input "
- "passed to init arguement.",
+ "passed to init argument.",
seed_iter + 1,
n_init);
raft::copy(
@@ -1026,7 +1026,7 @@ void kmeans_predict(handle_t const& handle,
auto metric = params.metric;
// Allocate memory
- // Device-accessible allocation of expandable storage used as temorary buffers
+ // Device-accessible allocation of expandable storage used as temporary buffers
rmm::device_uvector workspace(0, stream);
auto weight = raft::make_device_vector(handle, n_samples);
if (sample_weight.has_value())
@@ -1223,7 +1223,7 @@ void kmeans_transform(const raft::handle_t& handle,
auto n_clusters = params.n_clusters;
auto metric = params.metric;
- // Device-accessible allocation of expandable storage used as temorary buffers
+ // Device-accessible allocation of expandable storage used as temporary buffers
rmm::device_uvector workspace(0, stream);
auto dataBatchSize = getDataBatchSize(params.batch_samples, n_samples);
diff --git a/cpp/include/raft/cluster/kmeans.cuh b/cpp/include/raft/cluster/kmeans.cuh
index 2a35c1efa0..d64815244b 100644
--- a/cpp/include/raft/cluster/kmeans.cuh
+++ b/cpp/include/raft/cluster/kmeans.cuh
@@ -271,7 +271,7 @@ void transform(const raft::handle_t& handle,
* [dim = n_samples x n_features]
* @param[in] minClusterDistance Distance for every sample to it's nearest centroid
* [dim = n_samples]
- * @param[in] isSampleCentroid Flag the sample choosen as initial centroid
+ * @param[in] isSampleCentroid Flag the sample chosen as initial centroid
* [dim = n_samples]
* @param[in] select_op The sampling operation used to select the centroids
* @param[out] inRankCp The sampled centroids
@@ -798,7 +798,7 @@ using KeyValueIndexOp = kmeans::KeyValueIndexOp;
* [dim = n_samples x n_features]
* @param[in] minClusterDistance Distance for every sample to it's nearest centroid
* [dim = n_samples]
- * @param[in] isSampleCentroid Flag the sample choosen as initial centroid
+ * @param[in] isSampleCentroid Flag the sample chosen as initial centroid
* [dim = n_samples]
* @param[in] select_op The sampling operation used to select the centroids
* @param[out] inRankCp The sampled centroids
diff --git a/cpp/include/raft/comms/detail/std_comms.hpp b/cpp/include/raft/comms/detail/std_comms.hpp
index e64c6d9bf0..33892597d8 100644
--- a/cpp/include/raft/comms/detail/std_comms.hpp
+++ b/cpp/include/raft/comms/detail/std_comms.hpp
@@ -88,7 +88,7 @@ class std_comms : public comms_iface {
/**
* @brief constructor for collective-only operation
- * @param nccl_comm initilized nccl communicator
+ * @param nccl_comm initialized nccl communicator
* @param num_ranks size of the cluster
* @param rank rank of the current worker
* @param stream stream for ordering collective operations
@@ -266,7 +266,7 @@ class std_comms : public comms_iface {
bool restart = false; // resets the timeout when any progress was made
// Causes UCP to progress through the send/recv message queue
- while (ucp_handler_.ucp_progress(ucp_worker_) != 0) {
+ while (ucp_worker_progress(ucp_worker_) != 0) {
restart = true;
}
diff --git a/cpp/include/raft/comms/detail/ucp_helper.hpp b/cpp/include/raft/comms/detail/ucp_helper.hpp
index 668acafae4..9479bc24f9 100644
--- a/cpp/include/raft/comms/detail/ucp_helper.hpp
+++ b/cpp/include/raft/comms/detail/ucp_helper.hpp
@@ -16,7 +16,6 @@
#pragma once
-#include
#include
#include
#include
@@ -26,23 +25,6 @@ namespace raft {
namespace comms {
namespace detail {
-typedef void (*dlsym_print_info)(ucp_ep_h, FILE*);
-
-typedef void (*dlsym_rec_free)(void*);
-
-typedef int (*dlsym_worker_progress)(ucp_worker_h);
-
-typedef ucs_status_ptr_t (*dlsym_send)(
- ucp_ep_h, const void*, size_t, ucp_datatype_t, ucp_tag_t, ucp_send_callback_t);
-
-typedef ucs_status_ptr_t (*dlsym_recv)(ucp_worker_h,
- void*,
- size_t count,
- ucp_datatype_t datatype,
- ucp_tag_t,
- ucp_tag_t,
- ucp_tag_recv_callback_t);
-
/**
* Standard UCX request object that will be passed
* around asynchronously. This object is really
@@ -90,79 +72,10 @@ static void recv_callback(void* request, ucs_status_t status, ucp_tag_recv_info_
}
/**
- * Helper class for managing `dlopen` state and
- * interacting with ucp.
+ * Helper class for interacting with ucp.
*/
class comms_ucp_handler {
- public:
- comms_ucp_handler()
- {
- load_ucp_handle();
- load_send_func();
- load_recv_func();
- load_free_req_func();
- load_print_info_func();
- load_worker_progress_func();
- }
-
- ~comms_ucp_handler() { dlclose(ucp_handle); }
-
private:
- void* ucp_handle;
-
- dlsym_print_info print_info_func;
- dlsym_rec_free req_free_func;
- dlsym_worker_progress worker_progress_func;
- dlsym_send send_func;
- dlsym_recv recv_func;
-
- void load_ucp_handle()
- {
- ucp_handle = dlopen("libucp.so", RTLD_LAZY | RTLD_NOLOAD | RTLD_NODELETE);
- if (!ucp_handle) {
- ucp_handle = dlopen("libucp.so", RTLD_LAZY | RTLD_NODELETE);
- ASSERT(ucp_handle, "Cannot open UCX library: %s\n", dlerror());
- }
- // Reset any potential error
- dlerror();
- }
-
- void assert_dlerror()
- {
- char* error = dlerror();
- ASSERT(error == NULL, "Error loading function symbol: %s\n", error);
- }
-
- void load_send_func()
- {
- send_func = (dlsym_send)dlsym(ucp_handle, "ucp_tag_send_nb");
- assert_dlerror();
- }
-
- void load_free_req_func()
- {
- req_free_func = (dlsym_rec_free)dlsym(ucp_handle, "ucp_request_free");
- assert_dlerror();
- }
-
- void load_print_info_func()
- {
- print_info_func = (dlsym_print_info)dlsym(ucp_handle, "ucp_ep_print_info");
- assert_dlerror();
- }
-
- void load_worker_progress_func()
- {
- worker_progress_func = (dlsym_worker_progress)dlsym(ucp_handle, "ucp_worker_progress");
- assert_dlerror();
- }
-
- void load_recv_func()
- {
- recv_func = (dlsym_recv)dlsym(ucp_handle, "ucp_tag_recv_nb");
- assert_dlerror();
- }
-
ucp_tag_t build_message_tag(int rank, int tag) const
{
// keeping the rank in the lower bits enables debugging.
@@ -170,8 +83,6 @@ class comms_ucp_handler {
}
public:
- int ucp_progress(ucp_worker_h worker) const { return (*(worker_progress_func))(worker); }
-
/**
* @brief Frees any memory underlying the given ucp request object
*/
@@ -179,7 +90,7 @@ class comms_ucp_handler {
{
if (request->needs_release) {
request->req->completed = 0;
- (*(req_free_func))(request->req);
+ ucp_request_free(request->req);
}
free(request);
}
@@ -198,7 +109,7 @@ class comms_ucp_handler {
ucp_tag_t ucp_tag = build_message_tag(rank, tag);
ucs_status_ptr_t send_result =
- (*(send_func))(ep_ptr, buf, size, ucp_dt_make_contig(1), ucp_tag, send_callback);
+ ucp_tag_send_nb(ep_ptr, buf, size, ucp_dt_make_contig(1), ucp_tag, send_callback);
struct ucx_context* ucp_req = (struct ucx_context*)send_result;
if (UCS_PTR_IS_ERR(send_result)) {
@@ -240,7 +151,7 @@ class comms_ucp_handler {
ucp_tag_t ucp_tag = build_message_tag(sender_rank, tag);
ucs_status_ptr_t recv_result =
- (*(recv_func))(worker, buf, size, ucp_dt_make_contig(1), ucp_tag, tag_mask, recv_callback);
+ ucp_tag_recv_nb(worker, buf, size, ucp_dt_make_contig(1), ucp_tag, tag_mask, recv_callback);
struct ucx_context* ucp_req = (struct ucx_context*)recv_result;
diff --git a/cpp/include/raft/core/comms.hpp b/cpp/include/raft/core/comms.hpp
index 771f38fee3..78ce91dbf2 100644
--- a/cpp/include/raft/core/comms.hpp
+++ b/cpp/include/raft/core/comms.hpp
@@ -32,7 +32,7 @@ enum class op_t { SUM, PROD, MIN, MAX };
*/
enum class status_t {
SUCCESS, // Synchronization successful
- ERROR, // An error occured querying sync status
+ ERROR, // An error occurred querying sync status
ABORT // A failure occurred in sync, queued operations aborted
};
diff --git a/cpp/include/raft/core/device_mdspan.hpp b/cpp/include/raft/core/device_mdspan.hpp
index 3386610224..f64f15d0d5 100644
--- a/cpp/include/raft/core/device_mdspan.hpp
+++ b/cpp/include/raft/core/device_mdspan.hpp
@@ -44,7 +44,6 @@ template >
using managed_mdspan = mdspan>;
-namespace detail {
template
struct is_device_mdspan : std::false_type {
};
@@ -83,22 +82,20 @@ using is_input_managed_mdspan_t = is_managed_mdspan>;
template
using is_output_managed_mdspan_t = is_managed_mdspan>;
-} // end namespace detail
-
/**
* @\brief Boolean to determine if variadic template types Tn are either raft::device_mdspan or a
* derived type
*/
template
-inline constexpr bool is_device_mdspan_v = std::conjunction_v...>;
+inline constexpr bool is_device_mdspan_v = std::conjunction_v...>;
template
inline constexpr bool is_input_device_mdspan_v =
- std::conjunction_v...>;
+ std::conjunction_v...>;
template
inline constexpr bool is_output_device_mdspan_v =
- std::conjunction_v...>;
+ std::conjunction_v...>;
template
using enable_if_device_mdspan = std::enable_if_t>;
@@ -114,15 +111,15 @@ using enable_if_output_device_mdspan = std::enable_if_t
-inline constexpr bool is_managed_mdspan_v = std::conjunction_v...>;
+inline constexpr bool is_managed_mdspan_v = std::conjunction_v...>;
template
inline constexpr bool is_input_managed_mdspan_v =
- std::conjunction_v...>;
+ std::conjunction_v...>;
template
inline constexpr bool is_output_managed_mdspan_v =
- std::conjunction_v...>;
+ std::conjunction_v...>;
template
using enable_if_managed_mdspan = std::enable_if_t>;
@@ -292,18 +289,6 @@ auto make_device_vector_view(
return device_vector_view{ptr, mapping};
}
-/**
- * @brief Create a layout_stride mapping from extents and strides
- * @param[in] extents the dimensionality of the layout
- * @param[in] strides the strides between elements in the layout
- * @return raft::layout_stride::mapping
- */
-template
-auto make_strided_layout(Extents extents, Strides strides)
-{
- return layout_stride::mapping{extents, strides};
-}
-
/**
* @brief Construct a strided vector layout mapping
*
diff --git a/cpp/include/raft/core/error.hpp b/cpp/include/raft/core/error.hpp
index 8348595db3..b932309d24 100644
--- a/cpp/include/raft/core/error.hpp
+++ b/cpp/include/raft/core/error.hpp
@@ -97,23 +97,23 @@ struct logic_error : public raft::exception {
// FIXME: Need to be replaced with RAFT_FAIL
/** macro to throw a runtime error */
-#define THROW(fmt, ...) \
- do { \
- int size1 = \
- std::snprintf(nullptr, 0, "exception occured! file=%s line=%d: ", __FILE__, __LINE__); \
- int size2 = std::snprintf(nullptr, 0, fmt, ##__VA_ARGS__); \
- if (size1 < 0 || size2 < 0) \
- throw raft::exception("Error in snprintf, cannot handle raft exception."); \
- auto size = size1 + size2 + 1; /* +1 for final '\0' */ \
- auto buf = std::make_unique(size_t(size)); \
- std::snprintf(buf.get(), \
- size1 + 1 /* +1 for '\0' */, \
- "exception occured! file=%s line=%d: ", \
- __FILE__, \
- __LINE__); \
- std::snprintf(buf.get() + size1, size2 + 1 /* +1 for '\0' */, fmt, ##__VA_ARGS__); \
- std::string msg(buf.get(), buf.get() + size - 1); /* -1 to remove final '\0' */ \
- throw raft::exception(msg); \
+#define THROW(fmt, ...) \
+ do { \
+ int size1 = \
+ std::snprintf(nullptr, 0, "exception occurred! file=%s line=%d: ", __FILE__, __LINE__); \
+ int size2 = std::snprintf(nullptr, 0, fmt, ##__VA_ARGS__); \
+ if (size1 < 0 || size2 < 0) \
+ throw raft::exception("Error in snprintf, cannot handle raft exception."); \
+ auto size = size1 + size2 + 1; /* +1 for final '\0' */ \
+ auto buf = std::make_unique(size_t(size)); \
+ std::snprintf(buf.get(), \
+ size1 + 1 /* +1 for '\0' */, \
+ "exception occurred! file=%s line=%d: ", \
+ __FILE__, \
+ __LINE__); \
+ std::snprintf(buf.get() + size1, size2 + 1 /* +1 for '\0' */, fmt, ##__VA_ARGS__); \
+ std::string msg(buf.get(), buf.get() + size - 1); /* -1 to remove final '\0' */ \
+ throw raft::exception(msg); \
} while (0)
// FIXME: Need to be replaced with RAFT_EXPECTS
@@ -148,7 +148,7 @@ struct logic_error : public raft::exception {
*
* @param[in] cond Expression that evaluates to true or false
* @param[in] fmt String literal description of the reason that cond is expected to be true with
- * optinal format tagas
+ * optional format tagas
* @throw raft::logic_error if the condition evaluates to false.
*/
#define RAFT_EXPECTS(cond, fmt, ...) \
@@ -164,7 +164,7 @@ struct logic_error : public raft::exception {
* @brief Indicates that an erroneous code path has been taken.
*
* @param[in] fmt String literal description of the reason that this code path is erroneous with
- * optinal format tagas
+ * optional format tagas
* @throw always throws raft::logic_error
*/
#define RAFT_FAIL(fmt, ...) \
diff --git a/cpp/include/raft/core/host_device_accessor.hpp b/cpp/include/raft/core/host_device_accessor.hpp
index 81bf015f2e..e9ebdb6c9f 100644
--- a/cpp/include/raft/core/host_device_accessor.hpp
+++ b/cpp/include/raft/core/host_device_accessor.hpp
@@ -22,7 +22,7 @@ namespace raft {
/**
* @brief A mixin to distinguish host and device memory. This is the primary
- * accessor used throught RAFT's APIs to denote whether an underlying pointer
+ * accessor used throughout RAFT's APIs to denote whether an underlying pointer
* is accessible from device, host, or both.
*/
template
diff --git a/cpp/include/raft/core/host_mdspan.hpp b/cpp/include/raft/core/host_mdspan.hpp
index d3d6c53df3..1a0ea6432f 100644
--- a/cpp/include/raft/core/host_mdspan.hpp
+++ b/cpp/include/raft/core/host_mdspan.hpp
@@ -36,8 +36,6 @@ template >
using host_mdspan = mdspan>;
-namespace detail {
-
template
struct is_host_mdspan : std::false_type {
};
@@ -57,22 +55,18 @@ using is_input_host_mdspan_t = is_host_mdspan>;
template
using is_output_host_mdspan_t = is_host_mdspan>;
-} // namespace detail
-
/**
* @\brief Boolean to determine if variadic template types Tn are either raft::host_mdspan or a
* derived type
*/
template
-inline constexpr bool is_host_mdspan_v = std::conjunction_v...>;
+inline constexpr bool is_host_mdspan_v = std::conjunction_v...>;
template
-inline constexpr bool is_input_host_mdspan_v =
- std::conjunction_v...>;
+inline constexpr bool is_input_host_mdspan_v = std::conjunction_v...>;
template
-inline constexpr bool is_output_host_mdspan_v =
- std::conjunction_v...>;
+inline constexpr bool is_output_host_mdspan_v = std::conjunction_v...>;
template
using enable_if_host_mdspan = std::enable_if_t>;
diff --git a/cpp/include/raft/core/mdspan.hpp b/cpp/include/raft/core/mdspan.hpp
index db131ff6fa..786ce69f89 100644
--- a/cpp/include/raft/core/mdspan.hpp
+++ b/cpp/include/raft/core/mdspan.hpp
@@ -194,26 +194,15 @@ auto make_mdspan(ElementType* ptr, extents exts)
}
/**
- * @brief Create a raft::mdspan
- * @tparam ElementType the data type of the matrix elements
- * @tparam IndexType the index type of the extents
- * @tparam LayoutPolicy policy for strides and layout ordering
- * @tparam MemType the raft::memory_type for where the data are stored
- * @param ptr Pointer to the data
- * @param exts dimensionality of the array (series of integers)
- * @return raft::mdspan
+ * @brief Create a layout_stride mapping from extents and strides
+ * @param[in] extents the dimensionality of the layout
+ * @param[in] strides the strides between elements in the layout
+ * @return raft::layout_stride::mapping
*/
-template
-auto make_mdspan(ElementType* ptr, extents exts)
+template
+auto make_strided_layout(Extents extents, Strides strides)
{
- using accessor_type =
- host_device_accessor, MemType>;
-
- return mdspan{ptr, exts};
+ return layout_stride::mapping{extents, strides};
}
/**
diff --git a/cpp/include/raft/core/nvtx.hpp b/cpp/include/raft/core/nvtx.hpp
index 3dbe1dd511..09a41f10a6 100644
--- a/cpp/include/raft/core/nvtx.hpp
+++ b/cpp/include/raft/core/nvtx.hpp
@@ -32,7 +32,7 @@
* \code{.cpp}
* #include
* void some_function(int k){
- * // Begins a NVTX range with the messsage "some_function_{k}"
+ * // Begins a NVTX range with the message "some_function_{k}"
* // The range ends when some_function() returns
* common::nvtx::range fun_scope( r{"some_function_%d", k};
*
diff --git a/cpp/include/raft/distance/detail/cosine.cuh b/cpp/include/raft/distance/detail/cosine.cuh
index b7eed3e2a8..f06051962f 100644
--- a/cpp/include/raft/distance/detail/cosine.cuh
+++ b/cpp/include/raft/distance/detail/cosine.cuh
@@ -17,12 +17,23 @@
#pragma once
#include
+#include
#include
namespace raft {
namespace distance {
namespace detail {
+template
+struct CosineOp {
+ __device__ CosineOp() noexcept {}
+ __device__ AccT operator()(DataT& aNorm, const DataT& bNorm, DataT& accVal) const noexcept
+ {
+ return static_cast(1.0) - (AccT)(accVal / (aNorm * bNorm));
+ }
+ __device__ AccT operator()(DataT aData) const noexcept { return aData; }
+};
+
/**
* @brief the cosine distance matrix calculation implementer
* It computes the following equation:
@@ -71,61 +82,74 @@ void cosineImpl(const DataT* x,
FinalLambda fin_op,
cudaStream_t stream)
{
- typedef typename raft::linalg::Policy4x4::Policy RowPolicy;
- typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy;
+#if (__CUDACC_VER_MAJOR__ < 12)
+ const auto deviceVersion = getComputeCapability();
+ if (deviceVersion.first >= 8) {
+ using CosineOp_ = CosineOp;
+ CosineOp_ cosine_dist_op;
+
+ cutlassDistanceKernel(
+ x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, fin_op, cosine_dist_op, stream);
- typedef typename std::conditional::type KPolicy;
+ } else
+#endif
+ {
+ typedef typename raft::linalg::Policy4x4::Policy RowPolicy;
+ typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy;
- dim3 blk(KPolicy::Nthreads);
+ typedef typename std::conditional::type KPolicy;
- // Accumulation operation lambda
- auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { acc += x * y; };
+ dim3 blk(KPolicy::Nthreads);
- // epilogue operation lambda for final value calculation
- auto epilog_lambda = [] __device__(AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh],
- DataT * regxn,
- DataT * regyn,
- IdxT gridStrideX,
- IdxT gridStrideY) {
+ // Accumulation operation lambda
+ auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { acc += x * y; };
+
+ // epilogue operation lambda for final value calculation
+ auto epilog_lambda = [] __device__(AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh],
+ DataT * regxn,
+ DataT * regyn,
+ IdxT gridStrideX,
+ IdxT gridStrideY) {
#pragma unroll
- for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
+ for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
#pragma unroll
- for (int j = 0; j < KPolicy::AccColsPerTh; ++j) {
- acc[i][j] = acc[i][j] / (regxn[i] * regyn[j]);
+ for (int j = 0; j < KPolicy::AccColsPerTh; ++j) {
+ acc[i][j] = 1.0 - (acc[i][j] / (regxn[i] * regyn[j]));
+ }
}
- }
- };
+ };
- constexpr size_t shmemSize =
- KPolicy::SmemSize + ((KPolicy::Mblk + KPolicy::Nblk) * sizeof(DataT));
- if (isRowMajor) {
- auto cosineRowMajor = pairwiseDistanceMatKernel;
- dim3 grid = launchConfigGenerator(m, n, shmemSize, cosineRowMajor);
- cosineRowMajor<<>>(
- x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, core_lambda, epilog_lambda, fin_op);
- } else {
- auto cosineColMajor = pairwiseDistanceMatKernel;
- dim3 grid = launchConfigGenerator(m, n, shmemSize, cosineColMajor);
- cosineColMajor<<>>(
- x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, core_lambda, epilog_lambda, fin_op);
+ constexpr size_t shmemSize =
+ KPolicy::SmemSize + ((KPolicy::Mblk + KPolicy::Nblk) * sizeof(DataT));
+ if (isRowMajor) {
+ auto cosineRowMajor = pairwiseDistanceMatKernelPriorToAmpere;
+ dim3 grid = launchConfigGenerator(m, n, shmemSize, cosineRowMajor);
+ cosineRowMajor<<>>(
+ x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, core_lambda, epilog_lambda, fin_op);
+ } else {
+ auto cosineColMajor = pairwiseDistanceMatKernelPriorToAmpere;
+ dim3 grid = launchConfigGenerator(m, n, shmemSize, cosineColMajor);
+ cosineColMajor<<>>(
+ x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, core_lambda, epilog_lambda, fin_op);
+ }
}
RAFT_CUDA_TRY(cudaGetLastError());
@@ -207,13 +231,11 @@ void cosineAlgo1(Index_ m,
{
auto norm_op = [] __device__(AccType in) { return raft::mySqrt(in); };
- // Wrap fin_op to allow computing 1 - pA before calling fin_op
- auto wrapped_fin_op = [fin_op] __device__(AccType d_val, Index_ g_d_idx) {
- return fin_op(static_cast(1.0) - d_val, g_d_idx);
- };
-
- typedef std::is_same is_bool;
- typedef typename std::conditional::type CosOutType;
+ // raft distance support inputs as float/double and output as uint8_t/float/double.
+ static_assert(!((sizeof(OutType) > 1) && (sizeof(AccType) != sizeof(OutType))),
+ "OutType can be uint8_t, float, double,"
+ "if sizeof(OutType) > 1 then sizeof(AccType) == sizeof(OutType).");
+ typedef typename std::conditional::type CosOutType;
CosOutType* pDcast = reinterpret_cast(pD);
ASSERT(
@@ -234,12 +256,12 @@ void cosineAlgo1(Index_ m,
if (isRowMajor) {
lda = k, ldb = k, ldd = n;
- cosine(
- m, n, k, lda, ldb, ldd, pA, pB, col_vec, row_vec, pDcast, wrapped_fin_op, stream);
+ cosine(
+ m, n, k, lda, ldb, ldd, pA, pB, col_vec, row_vec, pDcast, fin_op, stream);
} else {
lda = n, ldb = m, ldd = m;
- cosine(
- n, m, k, lda, ldb, ldd, pB, pA, row_vec, col_vec, pDcast, wrapped_fin_op, stream);
+ cosine(
+ n, m, k, lda, ldb, ldd, pB, pA, row_vec, col_vec, pDcast, fin_op, stream);
}
}
diff --git a/cpp/include/raft/distance/detail/distance.cuh b/cpp/include/raft/distance/detail/distance.cuh
index fa0c7a48cc..b459c73bee 100644
--- a/cpp/include/raft/distance/detail/distance.cuh
+++ b/cpp/include/raft/distance/detail/distance.cuh
@@ -615,6 +615,19 @@ void distance(const InType* x,
* @note if workspace is passed as nullptr, this will return in
* worksize, the number of bytes of workspace required
*/
+
+// Default final op functor which facilitates elementwise operation on
+// final distance value if any.
+template
+struct default_fin_op {
+ __host__ __device__ default_fin_op() noexcept {};
+ // functor signature.
+ __host__ __device__ OutType operator()(AccType d_val, Index g_d_idx) const noexcept
+ {
+ return d_val;
+ }
+};
+
template (
- x, y, dist, m, n, k, workspace, worksize, default_fin_op, stream, isRowMajor, metric_arg);
+ using final_op_type = default_fin_op;
+ final_op_type fin_op;
+
+ // raft distance support inputs as float/double and output as uint8_t/float/double.
+ static_assert(!((sizeof(OutType) > 1) && (sizeof(AccType) != sizeof(OutType))),
+ "OutType can be uint8_t, float, double,"
+ "if sizeof(OutType) > 1 then sizeof(AccType) == sizeof(OutType).");
+ distance(
+ x, y, dist, m, n, k, workspace, worksize, fin_op, stream, isRowMajor, metric_arg);
RAFT_CUDA_TRY(cudaPeekAtLastError());
}
@@ -651,7 +670,7 @@ void distance(const InType* x,
* @param n number of points in y
* @param k dimensionality
*
- * @note If the specifed distanceType doesn't need the workspace at all, it
+ * @note If the specified distanceType doesn't need the workspace at all, it
* returns 0.
*/
template
+#include
#include
namespace raft {
namespace distance {
namespace detail {
+template
+struct L2ExpandedOp {
+ bool sqrt;
+
+ __device__ L2ExpandedOp() noexcept : sqrt(false) {}
+ __device__ L2ExpandedOp(bool isSqrt) noexcept : sqrt(isSqrt) {}
+ __device__ AccT operator()(DataT& aNorm, const DataT& bNorm, DataT& accVal) const noexcept
+ {
+ AccT outVal = aNorm + bNorm - DataT(2.0) * accVal;
+ return sqrt ? raft::mySqrt(outVal) : outVal;
+ }
+
+ __device__ AccT operator()(DataT aData) const noexcept { return aData; }
+};
+
/**
* @brief the expanded euclidean distance matrix calculation implementer
* It computes the following equation: C = op(A^2 + B^2 - 2AB)
@@ -71,71 +88,85 @@ void euclideanExpImpl(const DataT* x,
FinalLambda fin_op,
cudaStream_t stream)
{
- typedef typename raft::linalg::Policy4x4::Policy RowPolicy;
- typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy;
+#if (__CUDACC_VER_MAJOR__ < 12)
+ const auto deviceVersion = getComputeCapability();
+ if (deviceVersion.first >= 8) {
+ using L2Op = L2ExpandedOp;
+ L2Op L2_dist_op(sqrt);
- typedef typename std::conditional::type KPolicy;
+ cutlassDistanceKernel(
+ x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, fin_op, L2_dist_op, stream);
- dim3 blk(KPolicy::Nthreads);
+ } else
+#endif
+ {
- // Accumulation operation lambda
- auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { acc += x * y; };
+ typedef typename raft::linalg::Policy4x4::Policy RowPolicy;
+ typedef typename raft::linalg::Policy4x4::ColPolicy ColPolicy;
- // epilogue operation lambda for final value calculation
- auto epilog_lambda = [sqrt] __device__(AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh],
- DataT * regxn,
- DataT * regyn,
- IdxT gridStrideX,
- IdxT gridStrideY) {
+ typedef typename std::conditional::type KPolicy;
+
+ dim3 blk(KPolicy::Nthreads);
+
+ // Accumulation operation lambda
+ auto core_lambda = [] __device__(AccT & acc, DataT & x, DataT & y) { acc += x * y; };
+
+ // epilogue operation lambda for final value calculation
+ auto epilog_lambda = [sqrt] __device__(AccT acc[KPolicy::AccRowsPerTh][KPolicy::AccColsPerTh],
+ DataT * regxn,
+ DataT * regyn,
+ IdxT gridStrideX,
+ IdxT gridStrideY) {
#pragma unroll
- for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
+ for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
#pragma unroll
- for (int j = 0; j < KPolicy::AccColsPerTh; ++j) {
- acc[i][j] = regxn[i] + regyn[j] - (DataT)2.0 * acc[i][j];
+ for (int j = 0; j < KPolicy::AccColsPerTh; ++j) {
+ acc[i][j] = regxn[i] + regyn[j] - (DataT)2.0 * acc[i][j];
+ }
}
- }
- if (sqrt) {
+ if (sqrt) {
#pragma unroll
- for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
+ for (int i = 0; i < KPolicy::AccRowsPerTh; ++i) {
#pragma unroll
- for (int j = 0; j < KPolicy::AccColsPerTh; ++j) {
- acc[i][j] = raft::mySqrt(acc[i][j]);
+ for (int j = 0; j < KPolicy::AccColsPerTh; ++j) {
+ acc[i][j] = raft::mySqrt(acc[i][j]);
+ }
}
}
- }
- };
+ };
- constexpr size_t shmemSize =
- KPolicy::SmemSize + ((KPolicy::Mblk + KPolicy::Nblk) * sizeof(DataT));
- if (isRowMajor) {
- auto euclideanExpRowMajor = pairwiseDistanceMatKernel;
- dim3 grid = launchConfigGenerator(m, n, shmemSize, euclideanExpRowMajor);
-
- euclideanExpRowMajor<<>>(
- x, y, xn, yn, m, n, k, lda, ldb, ldd, dOutput, core_lambda, epilog_lambda, fin_op);
- } else {
- auto euclideanExpColMajor = pairwiseDistanceMatKernel;
- dim3 grid = launchConfigGenerator(m, n, shmemSize, euclideanExpColMajor);
- euclideanExpColMajor<<