diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 7879f22879..5f80d8cfda 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -52,7 +52,6 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
- skip_upload_pkgs: libraft-template
docs-build:
if: github.ref_type == 'branch'
needs: python-build
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index e349b25ce6..965943e726 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -43,16 +43,8 @@ jobs:
- '!README.md'
- '!docs/**'
- '!img/**'
- - '!notebooks/**'
- '!python/**'
- '!thirdparty/LICENSES/**'
- test_notebooks:
- - '**'
- - '!.devcontainer/**'
- - '!.pre-commit-config.yaml'
- - '!CONTRIBUTING.md'
- - '!README.md'
- - '!thirdparty/LICENSES/**'
test_python:
- '**'
- '!.devcontainer/**'
@@ -61,7 +53,6 @@ jobs:
- '!README.md'
- '!docs/**'
- '!img/**'
- - '!notebooks/**'
- '!thirdparty/LICENSES/**'
checks:
secrets: inherit
@@ -89,7 +80,6 @@ jobs:
with:
build_type: pull-request
enable_check_symbols: true
- symbol_exclusions: raft_cutlass
conda-python-build:
needs: conda-cpp-build
secrets: inherit
@@ -151,5 +141,5 @@ jobs:
cuda: '["12.5"]'
build_command: |
sccache -z;
- build-all -DBUILD_PRIMS_BENCH=ON -DBUILD_ANN_BENCH=ON --verbose;
+ build-all -DBUILD_PRIMS_BENCH=ON --verbose;
sccache -s;
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 1ae093bc56..178c6f677c 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -23,7 +23,6 @@ jobs:
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
enable_check_symbols: true
- symbol_exclusions: raft_cutlass
conda-cpp-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.02
diff --git a/.gitignore b/.gitignore
index 11b7bc3eba..3d6c84a83f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,7 +25,6 @@ log
dask-worker-space/
*.egg-info/
*.bin
-bench/ann/data
temporary_*.json
## scikit-build
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5a5342a74e..e3b3c8c440 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -62,7 +62,7 @@ repos:
entry: ./cpp/scripts/run-cmake-format.sh cmake-format
language: python
types: [cmake]
- exclude: .*/thirdparty/.*|.*FindAVX.cmake.*
+ exclude: .*/thirdparty/.*
# Note that pre-commit autoupdate does not update the versions
# of dependencies, so we'll have to update this manually.
additional_dependencies:
@@ -93,7 +93,10 @@ repos:
- id: codespell
additional_dependencies: [tomli]
args: ["--toml", "pyproject.toml"]
- exclude: (?x)^(^CHANGELOG.md$)
+ exclude: |
+ (?x)
+ ^CHANGELOG[.]md$|
+ ^cpp/cmake/patches/cutlass/build-export[.]patch$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
@@ -114,7 +117,6 @@ repos:
cpp/include/raft/neighbors/detail/faiss_select/|
cpp/include/raft/thirdparty/|
docs/source/sphinxext/github_link[.]py|
- cpp/cmake/modules/FindAVX[.]cmake
- id: verify-alpha-spec
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.16.0
diff --git a/README.md b/README.md
index 8d16fc5842..898c5c22c3 100755
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
#
RAFT: Reusable Accelerated Functions and Tools for Vector Search and More
> [!IMPORTANT]
-> The vector search and clustering algorithms in RAFT are being migrated to a new library dedicated to vector search called [cuVS](https://github.com/rapidsai/cuvs). We will continue to support the vector search algorithms in RAFT during this move, but will no longer update them after the RAPIDS 24.06 (June) release. We plan to complete the migration by RAPIDS 24.10 (October) release and will be removing them altogether in the 24.12 (December) release.
+> The vector search and clustering algorithms in RAFT have been formally migrated to a new library dedicated to vector search called [cuVS](https://github.com/rapidsai/cuvs). The headers for the vector search and clustering algorithms in RAFT will remain for a bried period, but will no longer be tested, benchmarked, included in the pre-compiled libraft binary, or otherwise updated after the 24.12 (December 2024) release. We will be removing these headers altogether in a future release. It is strongly suggested to use cuVS for these routines, which include any headers in the `distance`, `neighbors`, `cluster` and `spatial` directories, and use the RAFT versions at your own risk.
![RAFT tech stack](img/raft-tech-stack-vss.png)
@@ -27,7 +27,6 @@
- [RAFT Reference Documentation](https://docs.rapids.ai/api/raft/stable/): API Documentation.
- [RAFT Getting Started](./docs/source/quick_start.md): Getting started with RAFT.
- [Build and Install RAFT](./docs/source/build.md): Instructions for installing and building RAFT.
-- [Example Notebooks](./notebooks): Example jupyter notebooks
- [RAPIDS Community](https://rapids.ai/community.html): Get help, contribute, and collaborate.
- [GitHub repository](https://github.com/rapidsai/raft): Download the RAFT source code.
- [Issue tracker](https://github.com/rapidsai/raft/issues): Report issues or request features.
@@ -120,13 +119,13 @@ auto metric = raft::distance::DistanceType::L2SqrtExpanded;
raft::distance::pairwise_distance(handle, input.view(), input.view(), output.view(), metric);
```
-It's also possible to create `raft::device_mdspan` views to invoke the same API with raw pointers and shape information:
+It's also possible to create `raft::device_mdspan` views to invoke the same API with raw pointers and shape information. Take this example from the [NVIDIA cuVS](https://github.com/rapidsai/cuvs) library:
```c++
#include
#include
#include
-#include
+#include
raft::device_resources handle;
@@ -147,8 +146,8 @@ auto output_view = raft::make_device_matrix_view(output, n_samples, n_samples);
raft::random::make_blobs(handle, input_view, labels_view);
-auto metric = raft::distance::DistanceType::L2SqrtExpanded;
-raft::distance::pairwise_distance(handle, input_view, input_view, output_view, metric);
+auto metric = cuvs::distance::DistanceType::L2SqrtExpanded;
+cuvs::distance::pairwise_distance(handle, input_view, input_view, output_view, metric);
```
@@ -156,12 +155,12 @@ raft::distance::pairwise_distance(handle, input_view, input_view, output_view, m
The `pylibraft` package contains a Python API for RAFT algorithms and primitives. `pylibraft` integrates nicely into other libraries by being very lightweight with minimal dependencies and accepting any object that supports the `__cuda_array_interface__`, such as [CuPy's ndarray](https://docs.cupy.dev/en/stable/user_guide/interoperability.html#rmm). The number of RAFT algorithms exposed in this package is continuing to grow from release to release.
-The example below demonstrates computing the pairwise Euclidean distances between CuPy arrays. Note that CuPy is not a required dependency for `pylibraft`.
+The example below demonstrates computing the pairwise Euclidean distances between CuPy arrays using the [NVIDIA cuVS](https://github.com/rapidsai/cuvs) library. Note that CuPy is not a required dependency for `pylibraft`.
```python
import cupy as cp
-from pylibraft.distance import pairwise_distance
+from cuvs.distance import pairwise_distance
n_samples = 5000
n_features = 50
@@ -208,7 +207,7 @@ pylibraft.config.set_output_as(lambda device_ndarray: return device_ndarray.copy
```python
import cupy as cp
-from pylibraft.distance import pairwise_distance
+from cuvs.distance import pairwise_distance
n_samples = 5000
n_features = 50
@@ -223,18 +222,15 @@ pairwise_distance(in1, in2, out=output, metric="euclidean")
## Installing
-RAFT's C++ and Python libraries can both be installed through Conda and the Python libraries through Pip.
+RAFT's C++ and Python libraries can both be installed through Conda and the Python libraries through Pip.
### Installing C++ and Python through Conda
The easiest way to install RAFT is through conda and several packages are provided.
- `libraft-headers` C++ headers
-- `libraft` (optional) C++ shared library containing pre-compiled template instantiations and runtime API.
- `pylibraft` (optional) Python library
- `raft-dask` (optional) Python library for deployment of multi-node multi-GPU algorithms that use the RAFT `raft::comms` abstraction layer in Dask clusters.
-- `raft-ann-bench` (optional) Benchmarking tool for easily producing benchmarks that compare RAFT's vector search algorithms against other state-of-the-art implementations.
-- `raft-ann-bench-cpu` (optional) Reproducible benchmarking tool similar to above, but doesn't require CUDA to be installed on the machine. Can be used to test in environments with competitive CPUs.
Use the following command, depending on your CUDA version, to install all of the RAFT packages with conda (replace `rapidsai` with `rapidsai-nightly` to install more up-to-date but less stable nightly packages). `mamba` is preferred over the `conda` command.
```bash
@@ -255,8 +251,6 @@ You can also install the conda packages individually using the `mamba` command a
mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-headers cuda-version=12.5
```
-If installing the C++ APIs please see [using libraft](https://docs.rapids.ai/api/raft/nightly/using_libraft/) for more information on using the pre-compiled shared library. You can also refer to the [example C++ template project](https://github.com/rapidsai/raft/tree/branch-25.02/cpp/template) for a ready-to-go CMake configuration that you can drop into your project and build against installed RAFT development artifacts above.
-
### Installing Python through Pip
`pylibraft` and `raft-dask` both have experimental packages that can be [installed through pip](https://rapids.ai/pip.html#install):
@@ -265,12 +259,10 @@ pip install pylibraft-cu11 --extra-index-url=https://pypi.nvidia.com
pip install raft-dask-cu11 --extra-index-url=https://pypi.nvidia.com
```
-These packages statically build RAFT's pre-compiled instantiations and so the C++ headers and pre-compiled shared library won't be readily available to use in your code.
+These packages statically build RAFT's pre-compiled instantiations and so the C++ headers won't be readily available to use in your code.
The [build instructions](https://docs.rapids.ai/api/raft/nightly/build/) contain more details on building RAFT from source and including it in downstream projects. You can also find a more comprehensive version of the above CPM code snippet the [Building RAFT C++ and Python from source](https://docs.rapids.ai/api/raft/nightly/build/#building-c-and-python-from-source) section of the build instructions.
-You can find an example [RAFT project template](cpp/template/README.md) in the `cpp/template` directory, which demonstrates how to build a new application with RAFT or incorporate RAFT into an existing CMake project.
-
## Contributing
@@ -284,7 +276,7 @@ When citing RAFT generally, please consider referencing this Github project.
title={Rapidsai/raft: RAFT contains fundamental widely-used algorithms and primitives for data science, Graph and machine learning.},
url={https://github.com/rapidsai/raft},
journal={GitHub},
- publisher={Nvidia RAPIDS},
+ publisher={NVIDIA RAPIDS},
author={Rapidsai},
year={2022}
}
diff --git a/build.sh b/build.sh
index feb2d7256e..a95cb8ee23 100755
--- a/build.sh
+++ b/build.sh
@@ -18,8 +18,8 @@ ARGS=$*
# scripts, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)
-VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall -v -g -n --compile-lib --compile-static-lib --allgpuarch --no-nvtx --cpu-only --show_depr_warn --incl-cache-stats --time -h"
-HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-prims=] [--limit-bench-ann=] [--build-metrics=]
+VALIDARGS="clean libraft pylibraft raft-dask docs tests bench-prims clean --uninstall -v -g -n --compile-lib --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
+HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-prims=] [--build-metrics=]
where is:
clean - remove all existing build artifacts and configuration (start over)
libraft - build the raft C++ code only. Also builds the C-wrapper library
@@ -29,8 +29,6 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool= is:
-v - verbose build mode
@@ -39,10 +37,8 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool==1.8.2
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4,!=3.30.0
-- cuda-nvtx=11.8
-- cuda-profiler-api=11.8.86
-- cuda-version=11.8
-- cudatoolkit
-- cxx-compiler
-- cython>=3.0.0,<3.1.0a0
-- gcc_linux-aarch64=11.*
-- glog>=0.6.0
-- h5py>=3.8.0
-- hnswlib=0.7.0
-- libcublas-dev=11.11.3.6
-- libcublas=11.11.3.6
-- libcurand-dev=10.3.0.86
-- libcurand=10.3.0.86
-- libcusolver-dev=11.4.1.48
-- libcusolver=11.4.1.48
-- libcusparse-dev=11.7.5.86
-- libcusparse=11.7.5.86
-- libucxx==0.42.*,>=0.0.0a0
-- matplotlib
-- nccl>=2.19
-- ninja
-- nlohmann_json>=3.11.2
-- nvcc_linux-aarch64=11.8
-- openblas
-- pandas
-- pyyaml
-- rapids-build-backend>=0.3.0,<0.4.0.dev0
-- rmm==25.2.*,>=0.0.0a0
-- scikit-build-core>=0.10.0
-- sysroot_linux-aarch64==2.17
-name: bench_ann_cuda-118_arch-aarch64
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
deleted file mode 100644
index 7fa432c8d6..0000000000
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- benchmark>=1.8.2
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4,!=3.30.0
-- cuda-nvtx=11.8
-- cuda-profiler-api=11.8.86
-- cuda-version=11.8
-- cudatoolkit
-- cxx-compiler
-- cython>=3.0.0,<3.1.0a0
-- gcc_linux-64=11.*
-- glog>=0.6.0
-- h5py>=3.8.0
-- hnswlib=0.7.0
-- libcublas-dev=11.11.3.6
-- libcublas=11.11.3.6
-- libcurand-dev=10.3.0.86
-- libcurand=10.3.0.86
-- libcusolver-dev=11.4.1.48
-- libcusolver=11.4.1.48
-- libcusparse-dev=11.7.5.86
-- libcusparse=11.7.5.86
-- libucxx==0.42.*,>=0.0.0a0
-- matplotlib
-- nccl>=2.19
-- ninja
-- nlohmann_json>=3.11.2
-- nvcc_linux-64=11.8
-- openblas
-- pandas
-- pyyaml
-- rapids-build-backend>=0.3.0,<0.4.0.dev0
-- rmm==25.2.*,>=0.0.0a0
-- scikit-build-core>=0.10.0
-- sysroot_linux-64==2.17
-name: bench_ann_cuda-118_arch-x86_64
diff --git a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml
deleted file mode 100644
index 0f59fc6090..0000000000
--- a/conda/environments/bench_ann_cuda-120_arch-aarch64.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- benchmark>=1.8.2
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4,!=3.30.0
-- cuda-cudart-dev
-- cuda-nvcc
-- cuda-nvtx-dev
-- cuda-profiler-api
-- cuda-version=12.0
-- cxx-compiler
-- cython>=3.0.0,<3.1.0a0
-- gcc_linux-aarch64=11.*
-- glog>=0.6.0
-- h5py>=3.8.0
-- hnswlib=0.7.0
-- libcublas-dev
-- libcurand-dev
-- libcusolver-dev
-- libcusparse-dev
-- libucxx==0.42.*,>=0.0.0a0
-- matplotlib
-- nccl>=2.19
-- ninja
-- nlohmann_json>=3.11.2
-- openblas
-- pandas
-- pyyaml
-- rapids-build-backend>=0.3.0,<0.4.0.dev0
-- rmm==25.2.*,>=0.0.0a0
-- scikit-build-core>=0.10.0
-- sysroot_linux-aarch64==2.17
-name: bench_ann_cuda-120_arch-aarch64
diff --git a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml
deleted file mode 100644
index 273d6a9f9b..0000000000
--- a/conda/environments/bench_ann_cuda-120_arch-x86_64.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-# This file is generated by `rapids-dependency-file-generator`.
-# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
-channels:
-- rapidsai
-- rapidsai-nightly
-- dask/label/dev
-- conda-forge
-- nvidia
-dependencies:
-- benchmark>=1.8.2
-- c-compiler
-- clang-tools=16.0.6
-- clang==16.0.6
-- cmake>=3.26.4,!=3.30.0
-- cuda-cudart-dev
-- cuda-nvcc
-- cuda-nvtx-dev
-- cuda-profiler-api
-- cuda-version=12.0
-- cxx-compiler
-- cython>=3.0.0,<3.1.0a0
-- gcc_linux-64=11.*
-- glog>=0.6.0
-- h5py>=3.8.0
-- hnswlib=0.7.0
-- libcublas-dev
-- libcurand-dev
-- libcusolver-dev
-- libcusparse-dev
-- libucxx==0.42.*,>=0.0.0a0
-- matplotlib
-- nccl>=2.19
-- ninja
-- nlohmann_json>=3.11.2
-- openblas
-- pandas
-- pyyaml
-- rapids-build-backend>=0.3.0,<0.4.0.dev0
-- rmm==25.2.*,>=0.0.0a0
-- scikit-build-core>=0.10.0
-- sysroot_linux-64==2.17
-name: bench_ann_cuda-120_arch-x86_64
diff --git a/conda/recipes/libraft/build_libraft_template.sh b/conda/recipes/libraft/build_libraft_template.sh
deleted file mode 100644
index 86c0fa11b6..0000000000
--- a/conda/recipes/libraft/build_libraft_template.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/env bash
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-
-# Just building template so we verify it uses libraft.so and fail if it doesn't build
-./build.sh template --no-nvtx
diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml
index bc0ff1fae7..4857f12cd1 100644
--- a/conda/recipes/libraft/conda_build_config.yaml
+++ b/conda/recipes/libraft/conda_build_config.yaml
@@ -19,21 +19,6 @@ c_stdlib_version:
cmake_version:
- ">=3.26.4,!=3.30.0"
-nccl_version:
- - ">=2.19"
-
-glog_version:
- - ">=0.6.0"
-
-faiss_version:
- - ">=1.7.1"
-
-h5py_version:
- - ">=3.8.0"
-
-nlohmann_json_version:
- - ">=3.11.2"
-
# The CTK libraries below are missing from the conda-forge::cudatoolkit package
# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
# and the "*_run_*" version specifiers correspond to `11.x` packages.
diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml
index a075308500..503c4cb6fb 100644
--- a/conda/recipes/libraft/meta.yaml
+++ b/conda/recipes/libraft/meta.yaml
@@ -322,57 +322,3 @@ outputs:
home: https://rapids.ai/
license: Apache-2.0
summary: libraft tests
- - name: libraft-template
- version: {{ version }}
- script: build_libraft_template.sh
- build:
- script_env: *script_env
- number: {{ GIT_DESCRIBE_NUMBER }}
- string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
- ignore_run_exports_from:
- {% if cuda_major == "11" %}
- - {{ compiler('cuda11') }}
- {% else %}
- - {{ compiler('cuda') }}
- - cuda-cudart-dev
- - libcublas-dev
- {% endif %}
- requirements:
- build:
- - {{ compiler('c') }}
- - {{ compiler('cxx') }}
- {% if cuda_major == "11" %}
- - {{ compiler('cuda11') }} ={{ cuda_version }}
- {% else %}
- - {{ compiler('cuda') }}
- {% endif %}
- - cuda-version ={{ cuda_version }}
- - cmake {{ cmake_version }}
- - ninja
- - {{ stdlib("c") }}
- host:
- - {{ pin_subpackage('libraft', exact=True) }}
- - {{ pin_subpackage('libraft-headers', exact=True) }}
- - cuda-version ={{ cuda_version }}
- {% if cuda_major == "11" %}
- - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
- - libcublas {{ cuda11_libcublas_host_version }}
- - libcublas-dev {{ cuda11_libcublas_host_version }}
- {% else %}
- - cuda-cudart-dev
- - cuda-profiler-api
- - libcublas-dev
- {% endif %}
- run:
- - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
- {% if cuda_major == "11" %}
- - cudatoolkit
- {% else %}
- - cuda-cudart
- - libcublas
- {% endif %}
- - {{ pin_subpackage('libraft', exact=True) }}
- about:
- home: https://rapids.ai/
- license: Apache-2.0
- summary: libraft template
diff --git a/conda/recipes/raft-ann-bench-cpu/build.sh b/conda/recipes/raft-ann-bench-cpu/build.sh
deleted file mode 100644
index 4462d5124b..0000000000
--- a/conda/recipes/raft-ann-bench-cpu/build.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/env bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
-
-./build.sh bench-ann --cpu-only --no-nvtx --build-metrics=bench_ann_cpu --incl-cache-stats
-cmake --install cpp/build --component ann_bench
diff --git a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml
deleted file mode 100644
index ed6f708e14..0000000000
--- a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-c_compiler_version:
- - 11
-
-cxx_compiler_version:
- - 11
-
-c_stdlib:
- - sysroot
-
-c_stdlib_version:
- - "2.17"
-
-cmake_version:
- - ">=3.26.4,!=3.30.0"
-
-glog_version:
- - ">=0.6.0"
-
-h5py_version:
- - ">=3.8.0"
-
-nlohmann_json_version:
- - ">=3.11.2"
-
-spdlog_version:
- - ">=1.14.1,<1.15"
-
-fmt_version:
- - ">=11.0.2,<12"
diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml
deleted file mode 100644
index 94f7102726..0000000000
--- a/conda/recipes/raft-ann-bench-cpu/meta.yaml
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-
-# Usage:
-# conda build . -c conda-forge -c nvidia -c rapidsai
-{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
-{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
-{% set py_version = environ['CONDA_PY'] %}
-{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
-{% set date_string = environ['RAPIDS_DATE_STRING'] %}
-
-package:
- name: raft-ann-bench-cpu
- version: {{ version }}
- script: build.sh
-
-source:
- path: ../../..
-
-build:
- script_env:
- - AWS_ACCESS_KEY_ID
- - AWS_SECRET_ACCESS_KEY
- - AWS_SESSION_TOKEN
- - CMAKE_C_COMPILER_LAUNCHER
- - CMAKE_CUDA_COMPILER_LAUNCHER
- - CMAKE_CXX_COMPILER_LAUNCHER
- - CMAKE_GENERATOR
- - PARALLEL_LEVEL
- - RAPIDS_ARTIFACTS_DIR
- - SCCACHE_BUCKET
- - SCCACHE_IDLE_TIMEOUT
- - SCCACHE_REGION
- - SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64]
- - SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64]
- - SCCACHE_S3_USE_SSL
- number: {{ GIT_DESCRIBE_NUMBER }}
- string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
-
-requirements:
- build:
- - {{ compiler('c') }}
- - {{ compiler('cxx') }}
- - cmake {{ cmake_version }}
- - ninja
- - {{ stdlib("c") }}
-
- host:
- - glog {{ glog_version }}
- - matplotlib
- - nlohmann_json {{ nlohmann_json_version }}
- - spdlog {{ spdlog_version }}
- - fmt {{ fmt_version }}
- - python
- - pyyaml
- - pandas
- - rapids-build-backend>=0.3.0,<0.4.0.dev0
-
- run:
- - glog {{ glog_version }}
- - h5py {{ h5py_version }}
- - matplotlib
- - python
- - pyyaml
- - pandas
- - benchmark
-about:
- home: https://rapids.ai/
- license: Apache-2.0
- summary: RAFT ANN CPU benchmarks
diff --git a/conda/recipes/raft-ann-bench/build.sh b/conda/recipes/raft-ann-bench/build.sh
deleted file mode 100644
index 00078792a1..0000000000
--- a/conda/recipes/raft-ann-bench/build.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/env bash
-# Copyright (c) 2023, NVIDIA CORPORATION.
-
-./build.sh bench-ann --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats
-cmake --install cpp/build --component ann_bench
diff --git a/conda/recipes/raft-ann-bench/conda_build_config.yaml b/conda/recipes/raft-ann-bench/conda_build_config.yaml
deleted file mode 100644
index 47bd730daf..0000000000
--- a/conda/recipes/raft-ann-bench/conda_build_config.yaml
+++ /dev/null
@@ -1,70 +0,0 @@
-c_compiler_version:
- - 11
-
-cxx_compiler_version:
- - 11
-
-cuda_compiler:
- - cuda-nvcc
-
-cuda11_compiler:
- - nvcc
-
-c_stdlib:
- - sysroot
-
-c_stdlib_version:
- - "2.17"
-
-cmake_version:
- - ">=3.26.4,!=3.30.0"
-
-nccl_version:
- - ">=2.19"
-
-glog_version:
- - ">=0.6.0"
-
-h5py_version:
- - ">=3.8.0"
-
-nlohmann_json_version:
- - ">=3.11.2"
-
-# The CTK libraries below are missing from the conda-forge::cudatoolkit package
-# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
-# and the "*_run_*" version specifiers correspond to `11.x` packages.
-
-cuda11_libcublas_host_version:
- - "=11.11.3.6"
-
-cuda11_libcublas_run_version:
- - ">=11.5.2.43,<12.0.0"
-
-cuda11_libcurand_host_version:
- - "=10.3.0.86"
-
-cuda11_libcurand_run_version:
- - ">=10.2.5.43,<10.3.1"
-
-cuda11_libcusolver_host_version:
- - "=11.4.1.48"
-
-cuda11_libcusolver_run_version:
- - ">=11.2.0.43,<11.4.2"
-
-cuda11_libcusparse_host_version:
- - "=11.7.5.86"
-
-cuda11_libcusparse_run_version:
- - ">=11.6.0.43,<12.0.0"
-
-# `cuda-profiler-api` only has `11.8.0` and `12.0.0` packages for all
-# architectures. The "*_host_*" version specifiers correspond to `11.8` packages and the
-# "*_run_*" version specifiers correspond to `11.x` packages.
-
-cuda11_cuda_profiler_api_host_version:
- - "=11.8.86"
-
-cuda11_cuda_profiler_api_run_version:
- - ">=11.4.240,<12"
diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml
deleted file mode 100644
index d6aeb5f860..0000000000
--- a/conda/recipes/raft-ann-bench/meta.yaml
+++ /dev/null
@@ -1,110 +0,0 @@
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-
-# Usage:
-# conda build . -c conda-forge -c nvidia -c rapidsai
-{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') + environ.get('VERSION_SUFFIX', '') %}
-{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
-{% set py_version = environ['CONDA_PY'] %}
-{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
-{% set cuda_major = cuda_version.split('.')[0] %}
-{% set date_string = environ['RAPIDS_DATE_STRING'] %}
-
-package:
- name: raft-ann-bench
- version: {{ version }}
- script: build.sh
-
-source:
- path: ../../..
-
-build:
- script_env:
- - AWS_ACCESS_KEY_ID
- - AWS_SECRET_ACCESS_KEY
- - AWS_SESSION_TOKEN
- - CMAKE_C_COMPILER_LAUNCHER
- - CMAKE_CUDA_COMPILER_LAUNCHER
- - CMAKE_CXX_COMPILER_LAUNCHER
- - CMAKE_GENERATOR
- - PARALLEL_LEVEL
- - RAPIDS_ARTIFACTS_DIR
- - SCCACHE_BUCKET
- - SCCACHE_IDLE_TIMEOUT
- - SCCACHE_REGION
- - SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64]
- - SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64]
- - SCCACHE_S3_USE_SSL
- number: {{ GIT_DESCRIBE_NUMBER }}
- string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
- ignore_run_exports_from:
- {% if cuda_major == "11" %}
- - {{ compiler('cuda11') }}
- {% else %}
- - {{ compiler('cuda') }}
- - cuda-cudart-dev
- - libcublas-dev
- {% endif %}
-
-requirements:
- build:
- - {{ compiler('c') }}
- - {{ compiler('cxx') }}
- {% if cuda_major == "11" %}
- - {{ compiler('cuda11') }} ={{ cuda_version }}
- {% else %}
- - {{ compiler('cuda') }}
- {% endif %}
- - cuda-version ={{ cuda_version }}
- - cmake {{ cmake_version }}
- - ninja
- - {{ stdlib("c") }}
-
- host:
- - python
- - libraft {{ version }}
- - cuda-version ={{ cuda_version }}
- {% if cuda_major == "11" %}
- - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
- - libcublas {{ cuda11_libcublas_host_version }}
- - libcublas-dev {{ cuda11_libcublas_host_version }}
- {% else %}
- - cuda-cudart-dev
- - cuda-profiler-api
- - libcublas-dev
- {% endif %}
- - glog {{ glog_version }}
- - nlohmann_json {{ nlohmann_json_version }}
- - h5py {{ h5py_version }}
- - benchmark
- - matplotlib
- - python
- - pandas
- - pyyaml
- # rmm is needed to determine if package is gpu-enabled
- - rmm ={{ minor_version }}
- - rapids-build-backend>=0.3.0,<0.4.0.dev0
-
- run:
- - python
- - libraft {{ version }}
- - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
- {% if cuda_major == "11" %}
- - cudatoolkit
- {% else %}
- - cuda-cudart
- - libcublas
- {% endif %}
- - glog {{ glog_version }}
- - h5py {{ h5py_version }}
- - benchmark
- - glog {{ glog_version }}
- - matplotlib
- - python
- - pandas
- - pyyaml
- # rmm is needed to determine if package is gpu-enabled
- - rmm ={{ minor_version }}
-about:
- home: https://rapids.ai/
- license: Apache-2.0
- summary: RAFT ANN GPU and CPU benchmarks
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d7eeb60b27..780f6f8581 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -17,17 +17,13 @@ include(rapids-cpm)
include(rapids-export)
include(rapids-find)
-option(BUILD_CPU_ONLY "Build CPU only components. Applies to RAFT ANN benchmarks currently" OFF)
-
# workaround for rapids_cuda_init_architectures not working for arch detection with
# enable_language(CUDA)
set(lang_list "CXX")
-if(NOT BUILD_CPU_ONLY)
- include(rapids-cuda)
- rapids_cuda_init_architectures(RAFT)
- list(APPEND lang_list "CUDA")
-endif()
+include(rapids-cuda)
+rapids_cuda_init_architectures(RAFT)
+list(APPEND lang_list "CUDA")
project(
RAFT
@@ -53,8 +49,6 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
option(BUILD_SHARED_LIBS "Build raft shared libraries" ON)
option(BUILD_TESTS "Build raft unit-tests" ON)
option(BUILD_PRIMS_BENCH "Build raft C++ benchmark tests" OFF)
-option(BUILD_ANN_BENCH "Build raft ann benchmarks" OFF)
-option(BUILD_CAGRA_HNSWLIB "Build CAGRA+hnswlib interface" ON)
option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF)
option(CUDA_ENABLE_LINEINFO
"Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF
@@ -68,23 +62,13 @@ option(DISABLE_OPENMP "Disable OpenMP" OFF)
option(RAFT_NVTX "Enable nvtx markers" OFF)
set(RAFT_COMPILE_LIBRARY_DEFAULT OFF)
-if((BUILD_TESTS
- OR BUILD_PRIMS_BENCH
- OR BUILD_ANN_BENCH
- )
- AND NOT BUILD_CPU_ONLY
-)
+if(BUILD_TESTS OR BUILD_PRIMS_BENCH)
set(RAFT_COMPILE_LIBRARY_DEFAULT ON)
endif()
option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations"
${RAFT_COMPILE_LIBRARY_DEFAULT}
)
-if(BUILD_CPU_ONLY)
- set(BUILD_SHARED_LIBS OFF)
- set(BUILD_TESTS OFF)
-endif()
-
# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to
# have different values for the `Threads::Threads` target. Setting this flag ensures
# `Threads::Threads` is the same value across all builds so that cache hits occur
@@ -97,20 +81,14 @@ include(CMakeDependentOption)
message(VERBOSE "RAFT: Building optional components: ${raft_FIND_COMPONENTS}")
message(VERBOSE "RAFT: Build RAFT unit-tests: ${BUILD_TESTS}")
message(VERBOSE "RAFT: Building raft C++ benchmarks: ${BUILD_PRIMS_BENCH}")
-message(VERBOSE "RAFT: Building ANN benchmarks: ${BUILD_ANN_BENCH}")
-message(VERBOSE "RAFT: Build CPU only components: ${BUILD_CPU_ONLY}")
message(VERBOSE "RAFT: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}")
message(VERBOSE "RAFT: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS})
message(VERBOSE "RAFT: Disable OpenMP: ${DISABLE_OPENMP}")
message(VERBOSE "RAFT: Enable kernel resource usage info: ${CUDA_ENABLE_KERNELINFO}")
message(VERBOSE "RAFT: Enable lineinfo in nvcc: ${CUDA_ENABLE_LINEINFO}")
message(VERBOSE "RAFT: Enable nvtx markers: ${RAFT_NVTX}")
-message(VERBOSE
- "RAFT: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}"
-)
-message(VERBOSE
- "RAFT: Statically link the CUDA math libraries: ${CUDA_STATIC_MATH_LIBRARIES}"
-)
+message(VERBOSE "RAFT: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}")
+message(VERBOSE "RAFT: Statically link the CUDA math libraries: ${CUDA_STATIC_MATH_LIBRARIES}")
# Set RMM logging level
set(RMM_LOGGING_LEVEL
@@ -143,21 +121,17 @@ if(CUDA_STATIC_MATH_LIBRARIES)
set(_ctk_static_suffix "_static")
endif()
-if(NOT BUILD_CPU_ONLY)
- # CUDA runtime
- rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
- # * find CUDAToolkit package
- # * determine GPU architectures
- # * enable the CMake CUDA language
- # * set other CUDA compilation flags
- rapids_find_package(
- CUDAToolkit REQUIRED
- BUILD_EXPORT_SET raft-exports
- INSTALL_EXPORT_SET raft-exports
- )
-else()
- add_compile_definitions(BUILD_CPU_ONLY)
-endif()
+# CUDA runtime
+rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
+# * find CUDAToolkit package
+# * determine GPU architectures
+# * enable the CMake CUDA language
+# * set other CUDA compilation flags
+rapids_find_package(
+ CUDAToolkit REQUIRED
+ BUILD_EXPORT_SET raft-exports
+ INSTALL_EXPORT_SET raft-exports
+)
if(NOT DISABLE_OPENMP)
rapids_find_package(
@@ -178,30 +152,24 @@ include(cmake/modules/ConfigureCUDA.cmake)
# add third party dependencies using CPM
rapids_cpm_init()
-if(NOT BUILD_CPU_ONLY)
- # CCCL before rmm/cuco so we get the right version of CCCL
- include(cmake/thirdparty/get_cccl.cmake)
- include(cmake/thirdparty/get_rmm.cmake)
- include(cmake/thirdparty/get_cutlass.cmake)
+# CCCL before rmm/cuco so we get the right version of CCCL
+include(cmake/thirdparty/get_cccl.cmake)
+include(cmake/thirdparty/get_rmm.cmake)
+include(cmake/thirdparty/get_cutlass.cmake)
- include(${rapids-cmake-dir}/cpm/cuco.cmake)
- rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports)
-endif()
+include(${rapids-cmake-dir}/cpm/cuco.cmake)
+rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports)
if(BUILD_TESTS)
include(${rapids-cmake-dir}/cpm/gtest.cmake)
rapids_cpm_gtest(BUILD_STATIC)
endif()
-if(BUILD_PRIMS_BENCH OR BUILD_ANN_BENCH)
+if(BUILD_PRIMS_BENCH)
include(${rapids-cmake-dir}/cpm/gbench.cmake)
rapids_cpm_gbench(BUILD_STATIC)
endif()
-if(BUILD_CAGRA_HNSWLIB)
- include(cmake/thirdparty/get_hnswlib.cmake)
-endif()
-
# ##################################################################################################
# * raft ---------------------------------------------------------------------
add_library(raft INTERFACE)
@@ -210,14 +178,9 @@ add_library(raft::raft ALIAS raft)
target_include_directories(
raft INTERFACE "$" "$"
)
-if(BUILD_CAGRA_HNSWLIB)
- target_link_libraries(raft INTERFACE hnswlib::hnswlib)
-endif()
-if(NOT BUILD_CPU_ONLY)
- # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
- target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass CCCL::CCCL)
-endif()
+# Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
+target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass CCCL::CCCL)
target_compile_features(raft INTERFACE cxx_std_17 $)
target_compile_options(
@@ -300,277 +263,15 @@ if(RAFT_COMPILE_LIBRARY)
add_library(
raft_objs OBJECT
src/core/logger.cpp
- src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_correlation_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_correlation_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_hellinger_expanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_jensen_shannon_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_kl_divergence_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_kl_divergence_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l1_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l1_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_expanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_expanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l2_unexpanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l_inf_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_l_inf_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_lp_unexpanded_float_float_float_int.cu
- src/distance/detail/pairwise_matrix/dispatch_rbf.cu
- src/distance/detail/pairwise_matrix/dispatch_russel_rao_double_double_double_int.cu
- src/distance/detail/pairwise_matrix/dispatch_russel_rao_float_float_float_int.cu
- src/distance/distance.cu
- src/distance/fused_l2_nn.cu
- src/distance/fused_distance_nn.cu
src/linalg/detail/coalesced_reduction.cu
- src/matrix/detail/select_k_double_int64_t.cu
- src/matrix/detail/select_k_double_uint32_t.cu
- src/matrix/detail/select_k_float_int64_t.cu
- src/matrix/detail/select_k_float_uint32_t.cu
- src/matrix/detail/select_k_float_int32.cu
- src/matrix/detail/select_k_half_int64_t.cu
- src/matrix/detail/select_k_half_uint32_t.cu
- src/neighbors/ball_cover.cu
- src/neighbors/brute_force_fused_l2_knn_float_int64_t.cu
- src/neighbors/brute_force_knn_int64_t_float_int64_t.cu
- src/neighbors/brute_force_knn_int64_t_float_uint32_t.cu
- src/neighbors/brute_force_knn_int_float_int.cu
- src/neighbors/brute_force_knn_uint32_t_float_uint32_t.cu
- src/neighbors/brute_force_knn_index_float.cu
- src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_half_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_single_cta_half_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_single_cta_half_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_single_cta_half_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_single_cta_half_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu
- src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu
- src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu
- src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_multi_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_int8_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_uint8_uint32_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_float_uint64_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim128_t8_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim256_t16_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim512_t32_8pq_4subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_2subd_half.cu
- src/neighbors/detail/cagra/q_search_single_cta_half_uint64_dim1024_t32_8pq_4subd_half.cu
- src/neighbors/detail/ivf_flat_interleaved_scan_float_float_int64_t.cu
- src/neighbors/detail/ivf_flat_interleaved_scan_half_half_int64_t.cu
- src/neighbors/detail/ivf_flat_interleaved_scan_int8_t_int32_t_int64_t.cu
- src/neighbors/detail/ivf_flat_interleaved_scan_uint8_t_uint32_t_int64_t.cu
- src/neighbors/detail/ivf_flat_search.cu
- src/neighbors/detail/ivf_pq_compute_similarity_float_float.cu
- src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_false.cu
- src/neighbors/detail/ivf_pq_compute_similarity_float_fp8_true.cu
- src/neighbors/detail/ivf_pq_compute_similarity_float_half.cu
- src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_false.cu
- src/neighbors/detail/ivf_pq_compute_similarity_half_fp8_true.cu
- src/neighbors/detail/ivf_pq_compute_similarity_half_half.cu
- src/neighbors/detail/refine_host_float_float.cpp
- src/neighbors/detail/refine_host_half_float.cpp
- src/neighbors/detail/refine_host_int8_t_float.cpp
- src/neighbors/detail/refine_host_uint8_t_float.cpp
- src/neighbors/ivf_flat_build_float_int64_t.cu
- src/neighbors/ivf_flat_build_int8_t_int64_t.cu
- src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
- src/neighbors/ivf_flat_extend_float_int64_t.cu
- src/neighbors/ivf_flat_extend_int8_t_int64_t.cu
- src/neighbors/ivf_flat_extend_uint8_t_int64_t.cu
- src/neighbors/ivf_flat_search_float_int64_t.cu
- src/neighbors/ivf_flat_search_int8_t_int64_t.cu
- src/neighbors/ivf_flat_search_uint8_t_int64_t.cu
- src/neighbors/ivfpq_build_float_int64_t.cu
- src/neighbors/ivfpq_build_half_int64_t.cu
- src/neighbors/ivfpq_build_int8_t_int64_t.cu
- src/neighbors/ivfpq_build_uint8_t_int64_t.cu
- src/neighbors/ivfpq_extend_float_int64_t.cu
- src/neighbors/ivfpq_extend_half_int64_t.cu
- src/neighbors/ivfpq_extend_int8_t_int64_t.cu
- src/neighbors/ivfpq_extend_uint8_t_int64_t.cu
- src/neighbors/ivfpq_search_float_int64_t.cu
- src/neighbors/ivfpq_search_half_int64_t.cu
- src/neighbors/ivfpq_search_int8_t_int64_t.cu
- src/neighbors/ivfpq_search_uint8_t_int64_t.cu
- src/neighbors/refine_float_float.cu
- src/neighbors/refine_half_float.cu
- src/neighbors/refine_int8_t_float.cu
- src/neighbors/refine_uint8_t_float.cu
- src/raft_runtime/cluster/cluster_cost.cuh
- src/raft_runtime/cluster/cluster_cost_double.cu
- src/raft_runtime/cluster/cluster_cost_float.cu
- src/raft_runtime/cluster/kmeans_fit_double.cu
- src/raft_runtime/cluster/kmeans_fit_float.cu
- src/raft_runtime/cluster/kmeans_init_plus_plus_double.cu
- src/raft_runtime/cluster/kmeans_init_plus_plus_float.cu
- src/raft_runtime/cluster/update_centroids.cuh
- src/raft_runtime/cluster/update_centroids_double.cu
- src/raft_runtime/cluster/update_centroids_float.cu
- src/raft_runtime/distance/fused_distance_min_arg.cu
- src/raft_runtime/distance/fused_l2_min_arg.cu
- src/raft_runtime/distance/pairwise_distance.cu
- src/raft_runtime/matrix/select_k_float_int64_t.cu
- src/raft_runtime/neighbors/brute_force_knn_int64_t_float.cu
- src/raft_runtime/neighbors/cagra_build.cu
- src/raft_runtime/neighbors/cagra_search.cu
- src/raft_runtime/neighbors/cagra_serialize.cu
- src/raft_runtime/neighbors/eps_neighborhood.cu
- $<$:src/raft_runtime/neighbors/hnsw.cpp>
- src/raft_runtime/neighbors/ivf_flat_build.cu
- src/raft_runtime/neighbors/ivf_flat_search.cu
- src/raft_runtime/neighbors/ivf_flat_serialize.cu
- src/raft_runtime/neighbors/ivfpq_build.cu
- src/raft_runtime/neighbors/ivfpq_deserialize.cu
- src/raft_runtime/neighbors/ivfpq_search_float_int64_t.cu
- src/raft_runtime/neighbors/ivfpq_search_int8_t_int64_t.cu
- src/raft_runtime/neighbors/ivfpq_search_uint8_t_int64_t.cu
- src/raft_runtime/neighbors/ivfpq_serialize.cu
- src/raft_runtime/neighbors/refine_d_int64_t_float.cu
- src/raft_runtime/neighbors/refine_d_int64_t_int8_t.cu
- src/raft_runtime/neighbors/refine_d_int64_t_uint8_t.cu
- src/raft_runtime/neighbors/refine_h_int64_t_float.cu
- src/raft_runtime/neighbors/refine_h_int64_t_int8_t.cu
- src/raft_runtime/neighbors/refine_h_int64_t_uint8_t.cu
src/raft_runtime/random/rmat_rectangular_generator_int64_double.cu
src/raft_runtime/random/rmat_rectangular_generator_int64_float.cu
src/raft_runtime/random/rmat_rectangular_generator_int_double.cu
src/raft_runtime/random/rmat_rectangular_generator_int_float.cu
- src/spatial/knn/detail/ball_cover/registers_eps_pass_euclidean.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_2d_dist.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_2d_euclidean.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_2d_haversine.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_3d_dist.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_3d_euclidean.cu
- src/spatial/knn/detail/ball_cover/registers_pass_one_3d_haversine.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_2d_dist.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_2d_euclidean.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_2d_haversine.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_3d_dist.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_3d_euclidean.cu
- src/spatial/knn/detail/ball_cover/registers_pass_two_3d_haversine.cu
- src/spatial/knn/detail/fused_l2_knn_int32_t_float.cu
- src/spatial/knn/detail/fused_l2_knn_int64_t_float.cu
- src/spatial/knn/detail/fused_l2_knn_uint32_t_float.cu
+ src/raft_runtime/solver/lanczos_solver_int64_double.cu
+ src/raft_runtime/solver/lanczos_solver_int64_float.cu
+ src/raft_runtime/solver/lanczos_solver_int_double.cu
+ src/raft_runtime/solver/lanczos_solver_int_float.cu
)
set_target_properties(
raft_objs
@@ -847,10 +548,3 @@ endif()
if(BUILD_PRIMS_BENCH)
add_subdirectory(bench/prims/)
endif()
-
-# ##################################################################################################
-# * build ann benchmark executable -----------------------------------------------
-
-if(BUILD_ANN_BENCH)
- add_subdirectory(bench/ann/)
-endif()
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
deleted file mode 100644
index 35df378438..0000000000
--- a/cpp/bench/ann/CMakeLists.txt
+++ /dev/null
@@ -1,349 +0,0 @@
-# =============================================================================
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-list(APPEND CMAKE_MODULE_PATH "${RAFT_SOURCE_DIR}")
-
-# ##################################################################################################
-# * benchmark options ------------------------------------------------------------------------------
-
-option(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT "Include faiss' cpu brute-force algorithm in benchmark" ON)
-
-option(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algorithm in benchmark"
- ON
-)
-option(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE "Include raft's brute force knn in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB "Include raft's CAGRA in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_SINGLE_EXE
- "Make a single executable with benchmark as shared library modules" OFF
-)
-
-# ##################################################################################################
-# * Process options ----------------------------------------------------------
-
-find_package(Threads REQUIRED)
-
-set(RAFT_ANN_BENCH_USE_FAISS ON)
-set(RAFT_FAISS_ENABLE_GPU ON)
-set(RAFT_USE_FAISS_STATIC ON)
-
-if(BUILD_CPU_ONLY)
-
- # Include necessary logging dependencies
- include(cmake/thirdparty/get_fmt)
- include(cmake/thirdparty/get_spdlog)
- set(RAFT_FAISS_ENABLE_GPU OFF)
- set(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OFF)
- set(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OFF)
- set(RAFT_ANN_BENCH_USE_RAFT_CAGRA OFF)
- set(RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE OFF)
- set(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB OFF)
- set(RAFT_ANN_BENCH_USE_GGNN OFF)
-endif()
-
-set(RAFT_ANN_BENCH_USE_RAFT OFF)
-if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ
- OR RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE
- OR RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT
- OR RAFT_ANN_BENCH_USE_RAFT_CAGRA
- OR RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB
-)
- set(RAFT_ANN_BENCH_USE_RAFT ON)
-endif()
-
-# ##################################################################################################
-# * Fetch requirements -------------------------------------------------------------
-
-if(RAFT_ANN_BENCH_USE_HNSWLIB OR RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB)
- include(cmake/thirdparty/get_hnswlib)
-endif()
-
-include(cmake/thirdparty/get_nlohmann_json)
-
-if(RAFT_ANN_BENCH_USE_GGNN)
- include(cmake/thirdparty/get_ggnn)
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS)
- include(cmake/thirdparty/get_faiss)
-endif()
-
-# ##################################################################################################
-# * Enable NVTX if available
-
-# Note: ANN_BENCH wrappers have extra NVTX code not related to raft::nvtx.They track gbench
-# benchmark cases and iterations. This is to make limited NVTX available to all algos, not just
-# raft.
-if(TARGET CUDA::nvtx3)
- set(_CMAKE_REQUIRED_INCLUDES_ORIG ${CMAKE_REQUIRED_INCLUDES})
- get_target_property(CMAKE_REQUIRED_INCLUDES CUDA::nvtx3 INTERFACE_INCLUDE_DIRECTORIES)
- unset(NVTX3_HEADERS_FOUND CACHE)
- # Check the headers explicitly to make sure the cpu-only build succeeds
- CHECK_INCLUDE_FILE_CXX(nvtx3/nvToolsExt.h NVTX3_HEADERS_FOUND)
- set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_ORIG})
-endif()
-
-# ##################################################################################################
-# * Configure tests function-------------------------------------------------------------
-
-function(ConfigureAnnBench)
-
- set(oneValueArgs NAME)
- set(multiValueArgs PATH LINKS CXXFLAGS)
-
- if(NOT BUILD_CPU_ONLY)
- set(GPU_BUILD ON)
- endif()
-
- cmake_parse_arguments(
- ConfigureAnnBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}
- )
-
- set(BENCH_NAME ${ConfigureAnnBench_NAME}_ANN_BENCH)
-
- if(RAFT_ANN_BENCH_SINGLE_EXE)
- add_library(${BENCH_NAME} SHARED ${ConfigureAnnBench_PATH})
- string(TOLOWER ${BENCH_NAME} BENCH_LIB_NAME)
- set_target_properties(${BENCH_NAME} PROPERTIES OUTPUT_NAME ${BENCH_LIB_NAME})
- add_dependencies(${BENCH_NAME} ANN_BENCH)
- else()
- add_executable(${BENCH_NAME} ${ConfigureAnnBench_PATH})
- target_compile_definitions(
- ${BENCH_NAME} PRIVATE ANN_BENCH_BUILD_MAIN
- $<$:ANN_BENCH_NVTX3_HEADERS_FOUND>
- )
- target_link_libraries(
- ${BENCH_NAME} PRIVATE benchmark::benchmark $<$:CUDA::nvtx3>
- )
- endif()
-
- target_link_libraries(
- ${BENCH_NAME}
- PRIVATE raft::raft
- nlohmann_json::nlohmann_json
- ${ConfigureAnnBench_LINKS}
- Threads::Threads
- $<$:${RAFT_CTK_MATH_DEPENDENCIES}>
- $
- $
- $<$:fmt::fmt-header-only>
- $<$:spdlog::spdlog_header_only>
- )
-
- set_target_properties(
- ${BENCH_NAME}
- PROPERTIES # set target compile options
- CXX_STANDARD 17
- CXX_STANDARD_REQUIRED ON
- CUDA_STANDARD 17
- CUDA_STANDARD_REQUIRED ON
- POSITION_INDEPENDENT_CODE ON
- INTERFACE_POSITION_INDEPENDENT_CODE ON
- BUILD_RPATH "\$ORIGIN"
- INSTALL_RPATH "\$ORIGIN"
- )
-
- set(${ConfigureAnnBench_CXXFLAGS} ${RAFT_CXX_FLAGS} ${ConfigureAnnBench_CXXFLAGS})
-
- target_compile_options(
- ${BENCH_NAME} PRIVATE "$<$:${ConfigureAnnBench_CXXFLAGS}>"
- "$<$:${RAFT_CUDA_FLAGS}>"
- )
-
- if(RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME})
- target_compile_definitions(
- ${BENCH_NAME}
- PUBLIC
- RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME}=RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME}
- )
- endif()
-
- target_include_directories(
- ${BENCH_NAME}
- PUBLIC "$"
- PRIVATE ${ConfigureAnnBench_INCLUDES}
- )
-
- install(
- TARGETS ${BENCH_NAME}
- COMPONENT ann_bench
- DESTINATION bin/ann
- )
-endfunction()
-
-# ##################################################################################################
-# * Configure tests-------------------------------------------------------------
-
-if(RAFT_ANN_BENCH_USE_HNSWLIB)
- ConfigureAnnBench(
- NAME HNSWLIB PATH src/hnswlib/hnswlib_benchmark.cpp LINKS hnswlib::hnswlib
- )
-
-endif()
-
-if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ)
- ConfigureAnnBench(
- NAME
- RAFT_IVF_PQ
- PATH
- src/raft/raft_benchmark.cu
- src/raft/raft_ivf_pq.cu
- LINKS
- raft::compiled
- )
-endif()
-
-if(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT)
- ConfigureAnnBench(
- NAME
- RAFT_IVF_FLAT
- PATH
- src/raft/raft_benchmark.cu
- src/raft/raft_ivf_flat.cu
- LINKS
- raft::compiled
- )
-endif()
-
-if(RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE)
- ConfigureAnnBench(
- NAME RAFT_BRUTE_FORCE PATH src/raft/raft_benchmark.cu LINKS raft::compiled
- )
-endif()
-
-if(RAFT_ANN_BENCH_USE_RAFT_CAGRA)
- ConfigureAnnBench(
- NAME
- RAFT_CAGRA
- PATH
- src/raft/raft_benchmark.cu
- src/raft/raft_cagra_float.cu
- src/raft/raft_cagra_half.cu
- src/raft/raft_cagra_int8_t.cu
- src/raft/raft_cagra_uint8_t.cu
- LINKS
- raft::compiled
- )
-endif()
-
-if(RAFT_ANN_BENCH_USE_RAFT_CAGRA_HNSWLIB)
- ConfigureAnnBench(
- NAME RAFT_CAGRA_HNSWLIB PATH src/raft/raft_cagra_hnswlib.cu LINKS raft::compiled
- hnswlib::hnswlib
- )
-endif()
-
-message("RAFT_FAISS_TARGETS: ${RAFT_FAISS_TARGETS}")
-message("CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}")
-if(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT)
- ConfigureAnnBench(
- NAME FAISS_CPU_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS
- ${RAFT_FAISS_TARGETS}
- )
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT)
- ConfigureAnnBench(
- NAME FAISS_CPU_IVF_FLAT PATH src/faiss/faiss_cpu_benchmark.cpp LINKS
- ${RAFT_FAISS_TARGETS}
- )
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ)
- ConfigureAnnBench(
- NAME FAISS_CPU_IVF_PQ PATH src/faiss/faiss_cpu_benchmark.cpp LINKS
- ${RAFT_FAISS_TARGETS}
- )
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT AND RAFT_FAISS_ENABLE_GPU)
- ConfigureAnnBench(
- NAME FAISS_GPU_IVF_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS
- ${RAFT_FAISS_TARGETS}
- )
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ AND RAFT_FAISS_ENABLE_GPU)
- ConfigureAnnBench(
- NAME FAISS_GPU_IVF_PQ PATH src/faiss/faiss_gpu_benchmark.cu LINKS
- ${RAFT_FAISS_TARGETS}
- )
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT AND RAFT_FAISS_ENABLE_GPU)
- ConfigureAnnBench(
- NAME FAISS_GPU_FLAT PATH src/faiss/faiss_gpu_benchmark.cu LINKS ${RAFT_FAISS_TARGETS}
- )
-endif()
-
-if(RAFT_ANN_BENCH_USE_GGNN)
- include(cmake/thirdparty/get_glog)
- ConfigureAnnBench(NAME GGNN PATH src/ggnn/ggnn_benchmark.cu LINKS glog::glog ggnn::ggnn)
-endif()
-
-# ##################################################################################################
-# * Dynamically-loading ANN_BENCH executable -------------------------------------------------------
-if(RAFT_ANN_BENCH_SINGLE_EXE)
- add_executable(ANN_BENCH src/common/benchmark.cpp)
-
- target_include_directories(ANN_BENCH PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-
- target_link_libraries(
- ANN_BENCH
- PRIVATE raft::raft
- nlohmann_json::nlohmann_json
- benchmark::benchmark
- dl
- -static-libgcc
- fmt::fmt-header-only
- spdlog::spdlog_header_only
- -static-libstdc++
- $<$:CUDA::nvtx3>
- )
- set_target_properties(
- ANN_BENCH
- PROPERTIES # set target compile options
- CXX_STANDARD 17
- CXX_STANDARD_REQUIRED ON
- CUDA_STANDARD 17
- CUDA_STANDARD_REQUIRED ON
- POSITION_INDEPENDENT_CODE ON
- INTERFACE_POSITION_INDEPENDENT_CODE ON
- BUILD_RPATH "\$ORIGIN"
- INSTALL_RPATH "\$ORIGIN"
- )
- target_compile_definitions(
- ANN_BENCH
- PRIVATE
- $<$:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH}">
- $<$:ANN_BENCH_NVTX3_HEADERS_FOUND>
- )
-
- target_link_options(ANN_BENCH PRIVATE -export-dynamic)
-
- install(
- TARGETS ANN_BENCH
- COMPONENT ann_bench
- DESTINATION bin/ann
- EXCLUDE_FROM_ALL
- )
-endif()
diff --git a/cpp/bench/ann/README.md b/cpp/bench/ann/README.md
deleted file mode 100644
index 1a8af2e448..0000000000
--- a/cpp/bench/ann/README.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# RAFT CUDA ANN Benchmarks
-
-Please see the [ANN Benchmarks](https://docs.rapids.ai/api/raft/stable/cuda_ann_benchmarks.html) section of the RAFT documentation for instructions on building and using the ANN benchmarks.
\ No newline at end of file
diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp
deleted file mode 100644
index b010063dee..0000000000
--- a/cpp/bench/ann/src/common/ann_types.hpp
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#pragma once
-
-#include "cuda_stub.hpp" // cudaStream_t
-
-#include
-#include
-#include
-#include
-
-namespace raft::bench::ann {
-
-enum Objective {
- THROUGHPUT, // See how many vectors we can push through
- LATENCY // See how fast we can push a vector through
-};
-
-enum class MemoryType {
- Host,
- HostMmap,
- Device,
-};
-
-enum class Metric {
- kInnerProduct,
- kEuclidean,
-};
-
-inline auto parse_metric(const std::string& metric_str) -> Metric
-{
- if (metric_str == "inner_product") {
- return raft::bench::ann::Metric::kInnerProduct;
- } else if (metric_str == "euclidean") {
- return raft::bench::ann::Metric::kEuclidean;
- } else {
- throw std::runtime_error("invalid metric: '" + metric_str + "'");
- }
-}
-
-inline auto parse_memory_type(const std::string& memory_type) -> MemoryType
-{
- if (memory_type == "host") {
- return MemoryType::Host;
- } else if (memory_type == "mmap") {
- return MemoryType::HostMmap;
- } else if (memory_type == "device") {
- return MemoryType::Device;
- } else {
- throw std::runtime_error("invalid memory type: '" + memory_type + "'");
- }
-}
-
-struct AlgoProperty {
- MemoryType dataset_memory_type;
- // neighbors/distances should have same memory type as queries
- MemoryType query_memory_type;
-};
-
-class AnnBase {
- public:
- using index_type = size_t;
-
- inline AnnBase(Metric metric, int dim) : metric_(metric), dim_(dim) {}
- virtual ~AnnBase() noexcept = default;
-
- protected:
- Metric metric_;
- int dim_;
-};
-
-/**
- * The GPU-based algorithms, which do not perform CPU synchronization at the end of their build or
- * search methods, must implement this interface.
- *
- * The `cuda_timer` / `cuda_lap` from `util.hpp` uses this stream to record GPU times with events
- * and, if necessary, also synchronize (via events) between iterations.
- *
- * If the algo does not implement this interface, GPU timings are disabled.
- */
-class AnnGPU {
- public:
- /**
- * Return the main cuda stream for this algorithm.
- * If any work is done in multiple streams, they should synchornize with the main stream at the
- * end.
- */
- [[nodiscard]] virtual auto get_sync_stream() const noexcept -> cudaStream_t = 0;
- /**
- * By default a GPU algorithm uses a fixed stream to order GPU operations.
- * However, an algorithm may need to synchronize with the host at the end of its execution.
- * In that case, also synchronizing with a benchmark event would put it at disadvantage.
- *
- * We can disable event sync by passing `false` here
- * - ONLY IF THE ALGORITHM HAS PRODUCED ITS OUTPUT BY THE TIME IT SYNCHRONIZES WITH CPU.
- */
- [[nodiscard]] virtual auto uses_stream() const noexcept -> bool { return true; }
- virtual ~AnnGPU() noexcept = default;
-};
-
-template
-class ANN : public AnnBase {
- public:
- struct AnnSearchParam {
- Objective metric_objective = Objective::LATENCY;
- virtual ~AnnSearchParam() = default;
- [[nodiscard]] virtual auto needs_dataset() const -> bool { return false; };
- };
-
- inline ANN(Metric metric, int dim) : AnnBase(metric, dim) {}
- virtual ~ANN() noexcept override = default;
-
- virtual void build(const T* dataset, size_t nrow) = 0;
-
- virtual void set_search_param(const AnnSearchParam& param) = 0;
- // TODO: this assumes that an algorithm can always return k results.
- // This is not always possible.
- virtual void search(const T* queries,
- int batch_size,
- int k,
- AnnBase::index_type* neighbors,
- float* distances) const = 0;
-
- virtual void save(const std::string& file) const = 0;
- virtual void load(const std::string& file) = 0;
-
- virtual AlgoProperty get_preference() const = 0;
-
- // Some algorithms don't save the building dataset in their indices.
- // So they should be given the access to that dataset during searching.
- // The advantage of this way is that index has smaller size
- // and many indices can share one dataset.
- //
- // SearchParam::needs_dataset() of such algorithm should be true,
- // and set_search_dataset() should save the passed-in pointer somewhere.
- // The client code should call set_search_dataset() before searching,
- // and should not release dataset before searching is finished.
- virtual void set_search_dataset(const T* /*dataset*/, size_t /*nrow*/){};
-
- /**
- * Make a shallow copy of the ANN wrapper that shares the resources and ensures thread-safe access
- * to them. */
- virtual auto copy() -> std::unique_ptr> = 0;
-};
-
-} // namespace raft::bench::ann
-
-#define REGISTER_ALGO_INSTANCE(DataT) \
- template auto raft::bench::ann::create_algo( \
- const std::string&, const std::string&, int, const nlohmann::json&, const std::vector&) \
- ->std::unique_ptr>; \
- template auto raft::bench::ann::create_search_param(const std::string&, \
- const nlohmann::json&) \
- ->std::unique_ptr::AnnSearchParam>;
diff --git a/cpp/bench/ann/src/common/benchmark.cpp b/cpp/bench/ann/src/common/benchmark.cpp
deleted file mode 100644
index 5510abf42f..0000000000
--- a/cpp/bench/ann/src/common/benchmark.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// clang-format off
-#include "cuda_stub.hpp" // must go first
-// clang-format on
-
-#include "ann_types.hpp"
-
-#include
-#define JSON_DIAGNOSTICS 1
-#include
-
-#include
-#include
-#include
-
-namespace raft::bench::ann {
-
-struct lib_handle {
- void* handle{nullptr};
- explicit lib_handle(const std::string& name)
- {
- handle = dlopen(name.c_str(), RTLD_LAZY | RTLD_LOCAL);
- if (handle == nullptr) {
- auto error_msg = "Failed to load " + name;
- auto err = dlerror();
- if (err != nullptr && err[0] != '\0') { error_msg += ": " + std::string(err); }
- throw std::runtime_error(error_msg);
- }
- }
- ~lib_handle() noexcept
- {
- if (handle != nullptr) { dlclose(handle); }
- }
-};
-
-auto load_lib(const std::string& algo) -> void*
-{
- static std::unordered_map libs{};
- auto found = libs.find(algo);
-
- if (found != libs.end()) { return found->second.handle; }
- auto lib_name = "lib" + algo + "_ann_bench.so";
- return libs.emplace(algo, lib_name).first->second.handle;
-}
-
-auto get_fun_name(void* addr) -> std::string
-{
- Dl_info dl_info;
- if (dladdr(addr, &dl_info) != 0) {
- if (dl_info.dli_sname != nullptr && dl_info.dli_sname[0] != '\0') {
- return std::string{dl_info.dli_sname};
- }
- }
- throw std::logic_error("Failed to find out name of the looked up function");
-}
-
-template
-auto create_algo(const std::string& algo,
- const std::string& distance,
- int dim,
- const nlohmann::json& conf,
- const std::vector& dev_list) -> std::unique_ptr>
-{
- static auto fname = get_fun_name(reinterpret_cast(&create_algo));
- auto handle = load_lib(algo);
- auto fun_addr = dlsym(handle, fname.c_str());
- if (fun_addr == nullptr) {
- throw std::runtime_error("Couldn't load the create_algo function (" + algo + ")");
- }
- auto fun = reinterpret_cast)>(fun_addr);
- return fun(algo, distance, dim, conf, dev_list);
-}
-
-template
-std::unique_ptr::AnnSearchParam> create_search_param(
- const std::string& algo, const nlohmann::json& conf)
-{
- static auto fname = get_fun_name(reinterpret_cast(&create_search_param));
- auto handle = load_lib(algo);
- auto fun_addr = dlsym(handle, fname.c_str());
- if (fun_addr == nullptr) {
- throw std::runtime_error("Couldn't load the create_search_param function (" + algo + ")");
- }
- auto fun = reinterpret_cast)>(fun_addr);
- return fun(algo, conf);
-}
-
-}; // namespace raft::bench::ann
-
-REGISTER_ALGO_INSTANCE(float);
-REGISTER_ALGO_INSTANCE(std::int8_t);
-REGISTER_ALGO_INSTANCE(std::uint8_t);
-
-#include "benchmark.hpp"
-
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
deleted file mode 100644
index 185d54a0a3..0000000000
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ /dev/null
@@ -1,736 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "ann_types.hpp"
-#include "conf.hpp"
-#include "dataset.hpp"
-#include "util.hpp"
-
-#include
-
-#include
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-namespace raft::bench::ann {
-
-static inline std::unique_ptr current_algo{nullptr};
-static inline std::unique_ptr current_algo_props{nullptr};
-
-using kv_series = std::vector>>;
-
-inline auto apply_overrides(const std::vector& configs,
- const kv_series& overrides,
- std::size_t override_idx = 0) -> std::vector
-{
- std::vector results{};
- if (override_idx >= overrides.size()) {
- auto n = configs.size();
- for (size_t i = 0; i < n; i++) {
- auto c = configs[i];
- c["override_suffix"] = n > 1 ? "/" + std::to_string(i) : "";
- results.push_back(c);
- }
- return results;
- }
- auto rec_configs = apply_overrides(configs, overrides, override_idx + 1);
- auto [key, vals] = overrides[override_idx];
- auto n = vals.size();
- for (size_t i = 0; i < n; i++) {
- const auto& val = vals[i];
- for (auto rc : rec_configs) {
- if (n > 1) {
- rc["override_suffix"] =
- static_cast(rc["override_suffix"]) + "/" + std::to_string(i);
- }
- rc[key] = val;
- results.push_back(rc);
- }
- }
- return results;
-}
-
-inline auto apply_overrides(const nlohmann::json& config,
- const kv_series& overrides,
- std::size_t override_idx = 0)
-{
- return apply_overrides(std::vector{config}, overrides, 0);
-}
-
-inline void dump_parameters(::benchmark::State& state, nlohmann::json params)
-{
- std::string label = "";
- bool label_empty = true;
- for (auto& [key, val] : params.items()) {
- if (val.is_number()) {
- state.counters.insert({{key, val}});
- } else if (val.is_boolean()) {
- state.counters.insert({{key, val ? 1.0 : 0.0}});
- } else {
- auto kv = key + "=" + val.dump();
- if (label_empty) {
- label = kv;
- } else {
- label += "#" + kv;
- }
- label_empty = false;
- }
- }
- if (!label_empty) { state.SetLabel(label); }
-}
-
-inline auto parse_algo_property(AlgoProperty prop, const nlohmann::json& conf) -> AlgoProperty
-{
- if (conf.contains("dataset_memory_type")) {
- prop.dataset_memory_type = parse_memory_type(conf.at("dataset_memory_type"));
- }
- if (conf.contains("query_memory_type")) {
- prop.query_memory_type = parse_memory_type(conf.at("query_memory_type"));
- }
- return prop;
-};
-
-template
-void bench_build(::benchmark::State& state,
- std::shared_ptr> dataset,
- Configuration::Index index,
- bool force_overwrite)
-{
- // NB: these two thread-local vars can be used within algo wrappers
- raft::bench::ann::benchmark_thread_id = state.thread_index();
- raft::bench::ann::benchmark_n_threads = state.threads();
- dump_parameters(state, index.build_param);
- if (file_exists(index.file)) {
- if (force_overwrite) {
- log_info("Overwriting file: %s", index.file.c_str());
- } else {
- return state.SkipWithMessage(
- "Index file already exists (use --force to overwrite the index).");
- }
- }
-
- std::unique_ptr> algo;
- try {
- algo = ann::create_algo(
- index.algo, dataset->distance(), dataset->dim(), index.build_param, index.dev_list);
- } catch (const std::exception& e) {
- return state.SkipWithError("Failed to create an algo: " + std::string(e.what()));
- }
-
- const auto algo_property = parse_algo_property(algo->get_preference(), index.build_param);
-
- const T* base_set = dataset->base_set(algo_property.dataset_memory_type);
- std::size_t index_size = dataset->base_set_size();
-
- cuda_timer gpu_timer{algo};
- {
- nvtx_case nvtx{state.name()};
- for (auto _ : state) {
- [[maybe_unused]] auto ntx_lap = nvtx.lap();
- [[maybe_unused]] auto gpu_lap = gpu_timer.lap();
- try {
- algo->build(base_set, index_size);
- } catch (const std::exception& e) {
- state.SkipWithError(std::string(e.what()));
- }
- }
- }
- if (gpu_timer.active()) {
- state.counters.insert({"GPU", {gpu_timer.total_time(), benchmark::Counter::kAvgIterations}});
- }
- state.counters.insert({{"index_size", index_size}});
-
- if (state.skipped()) { return; }
- make_sure_parent_dir_exists(index.file);
- algo->save(index.file);
-}
-
-template
-void bench_search(::benchmark::State& state,
- Configuration::Index index,
- std::size_t search_param_ix,
- std::shared_ptr> dataset,
- Objective metric_objective)
-{
- // NB: these two thread-local vars can be used within algo wrappers
- raft::bench::ann::benchmark_thread_id = state.thread_index();
- raft::bench::ann::benchmark_n_threads = state.threads();
- std::size_t queries_processed = 0;
-
- const auto& sp_json = index.search_params[search_param_ix];
-
- if (state.thread_index() == 0) { dump_parameters(state, sp_json); }
-
- // NB: `k` and `n_queries` are guaranteed to be populated in conf.cpp
- const std::uint32_t k = sp_json["k"];
- // Amount of data processes in one go
- const std::size_t n_queries = sp_json["n_queries"];
- // Round down the query data to a multiple of the batch size to loop over full batches of data
- const std::size_t query_set_size = (dataset->query_set_size() / n_queries) * n_queries;
-
- if (dataset->query_set_size() < n_queries) {
- std::stringstream msg;
- msg << "Not enough queries in benchmark set. Expected " << n_queries << ", actual "
- << dataset->query_set_size();
- state.SkipWithError(msg.str());
- return;
- }
-
- // Each thread start from a different offset, so that the queries that they process do not
- // overlap.
- std::ptrdiff_t batch_offset = (state.thread_index() * n_queries) % query_set_size;
- std::ptrdiff_t queries_stride = state.threads() * n_queries;
- // Output is saved into a contiguous buffer (separate buffers for each thread).
- std::ptrdiff_t out_offset = 0;
-
- const T* query_set = nullptr;
-
- if (!file_exists(index.file)) {
- state.SkipWithError("Index file is missing. Run the benchmark in the build mode first.");
- return;
- }
-
- /**
- * Make sure the first thread loads the algo and dataset
- */
- progress_barrier load_barrier{};
- if (load_barrier.arrive(1) == 0) {
- // algo is static to cache it between close search runs to save time on index loading
- static std::string index_file = "";
- if (index.file != index_file) {
- current_algo.reset();
- index_file = index.file;
- }
-
- std::unique_ptr::AnnSearchParam> search_param;
- ANN* algo;
- try {
- if (!current_algo || (algo = dynamic_cast*>(current_algo.get())) == nullptr) {
- auto ualgo = ann::create_algo(
- index.algo, dataset->distance(), dataset->dim(), index.build_param, index.dev_list);
- algo = ualgo.get();
- algo->load(index_file);
- current_algo = std::move(ualgo);
- }
- search_param = ann::create_search_param(index.algo, sp_json);
- search_param->metric_objective = metric_objective;
- } catch (const std::exception& e) {
- state.SkipWithError("Failed to create an algo: " + std::string(e.what()));
- return;
- }
-
- current_algo_props = std::make_unique(
- std::move(parse_algo_property(algo->get_preference(), sp_json)));
-
- if (search_param->needs_dataset()) {
- try {
- algo->set_search_dataset(dataset->base_set(current_algo_props->dataset_memory_type),
- dataset->base_set_size());
- } catch (const std::exception& ex) {
- state.SkipWithError("The algorithm '" + index.name +
- "' requires the base set, but it's not available. " +
- "Exception: " + std::string(ex.what()));
- return;
- }
- }
- try {
- algo->set_search_param(*search_param);
- } catch (const std::exception& ex) {
- state.SkipWithError("An error occurred setting search parameters: " + std::string(ex.what()));
- return;
- }
-
- query_set = dataset->query_set(current_algo_props->query_memory_type);
- load_barrier.arrive(state.threads());
- } else {
- // All other threads will wait for the first thread to initialize the algo.
- load_barrier.wait(state.threads() * 2);
- // gbench ensures that all threads are synchronized at the start of the benchmark loop.
- // We are accessing shared variables (like current_algo, current_algo_probs) before the
- // benchmark loop, therefore the synchronization here is necessary.
- }
- query_set = dataset->query_set(current_algo_props->query_memory_type);
-
- /**
- * Each thread will manage its own outputs
- */
- using index_type = AnnBase::index_type;
- constexpr size_t kAlignResultBuf = 64;
- size_t result_elem_count = k * query_set_size;
- result_elem_count =
- ((result_elem_count + kAlignResultBuf - 1) / kAlignResultBuf) * kAlignResultBuf;
- auto& result_buf =
- get_result_buffer_from_global_pool(result_elem_count * (sizeof(float) + sizeof(index_type)));
- auto* neighbors_ptr =
- reinterpret_cast(result_buf.data(current_algo_props->query_memory_type));
- auto* distances_ptr = reinterpret_cast(neighbors_ptr + result_elem_count);
-
- {
- nvtx_case nvtx{state.name()};
-
- std::unique_ptr> algo{nullptr};
- try {
- dynamic_cast*>(current_algo.get())->copy().swap(algo);
- } catch (const std::exception& e) {
- state.SkipWithError("Algo::copy: " + std::string(e.what()));
- return;
- }
- // Initialize with algo, so that the timer.lap() object can sync with algo::get_sync_stream()
- cuda_timer gpu_timer{algo};
- auto start = std::chrono::high_resolution_clock::now();
- for (auto _ : state) {
- [[maybe_unused]] auto ntx_lap = nvtx.lap();
- [[maybe_unused]] auto gpu_lap = gpu_timer.lap();
- try {
- algo->search(query_set + batch_offset * dataset->dim(),
- n_queries,
- k,
- neighbors_ptr + out_offset * k,
- distances_ptr + out_offset * k);
- } catch (const std::exception& e) {
- state.SkipWithError("Benchmark loop: " + std::string(e.what()));
- break;
- }
-
- // advance to the next batch
- batch_offset = (batch_offset + queries_stride) % query_set_size;
- out_offset = (out_offset + n_queries) % query_set_size;
-
- queries_processed += n_queries;
- }
- auto end = std::chrono::high_resolution_clock::now();
- auto duration = std::chrono::duration_cast>(end - start).count();
- if (state.thread_index() == 0) { state.counters.insert({{"end_to_end", duration}}); }
- state.counters.insert({"Latency", {duration, benchmark::Counter::kAvgIterations}});
-
- if (gpu_timer.active()) {
- state.counters.insert({"GPU", {gpu_timer.total_time(), benchmark::Counter::kAvgIterations}});
- }
- }
-
- state.SetItemsProcessed(queries_processed);
-
- // This will be the total number of queries across all threads
- state.counters.insert({{"total_queries", queries_processed}});
-
- if (state.skipped()) { return; }
-
- // Each thread calculates recall on their partition of queries.
- // evaluate recall
- if (dataset->max_k() >= k) {
- const std::int32_t* gt = dataset->gt_set();
- const std::uint32_t max_k = dataset->max_k();
- result_buf.transfer_data(MemoryType::Host, current_algo_props->query_memory_type);
- auto* neighbors_host = reinterpret_cast(result_buf.data(MemoryType::Host));
- std::size_t rows = std::min(queries_processed, query_set_size);
- std::size_t match_count = 0;
- std::size_t total_count = rows * static_cast(k);
-
- // We go through the groundtruth with same stride as the benchmark loop.
- size_t out_offset = 0;
- size_t batch_offset = (state.thread_index() * n_queries) % query_set_size;
- while (out_offset < rows) {
- for (std::size_t i = 0; i < n_queries; i++) {
- size_t i_orig_idx = batch_offset + i;
- size_t i_out_idx = out_offset + i;
- if (i_out_idx < rows) {
- for (std::uint32_t j = 0; j < k; j++) {
- auto act_idx = std::int32_t(neighbors_host[i_out_idx * k + j]);
- for (std::uint32_t l = 0; l < k; l++) {
- auto exp_idx = gt[i_orig_idx * max_k + l];
- if (act_idx == exp_idx) {
- match_count++;
- break;
- }
- }
- }
- }
- }
- out_offset += n_queries;
- batch_offset = (batch_offset + queries_stride) % query_set_size;
- }
- double actual_recall = static_cast(match_count) / static_cast(total_count);
- state.counters.insert({"Recall", {actual_recall, benchmark::Counter::kAvgThreads}});
- }
-}
-
-inline void printf_usage()
-{
- ::benchmark::PrintDefaultHelp();
- fprintf(stdout,
- " [--build|--search] \n"
- " [--force]\n"
- " [--data_prefix=]\n"
- " [--index_prefix=]\n"
- " [--override_kv=]\n"
- " [--mode=\n"
- " [--threads=min[:max]]\n"
- " .json\n"
- "\n"
- "Note the non-standard benchmark parameters:\n"
- " --build: build mode, will build index\n"
- " --search: search mode, will search using the built index\n"
- " one and only one of --build and --search should be specified\n"
- " --force: force overwriting existing index files\n"
- " --data_prefix=:"
- " prepend to dataset file paths specified in the .json (default = "
- "'data/').\n"
- " --index_prefix=:"
- " prepend to index file paths specified in the .json (default = "
- "'index/').\n"
- " --override_kv=:"
- " override a build/search key one or more times multiplying the number of configurations;"
- " you can use this parameter multiple times to get the Cartesian product of benchmark"
- " configs.\n"
- " --mode="
- " run the benchmarks in latency (accumulate times spent in each batch) or "
- " throughput (pipeline batches and measure end-to-end) mode\n"
- " --threads=min[:max] specify the number threads to use for throughput benchmark."
- " Power of 2 values between 'min' and 'max' will be used. If only 'min' is specified,"
- " then a single test is run with 'min' threads. By default min=1, max=.\n");
-}
-
-template
-void register_build(std::shared_ptr> dataset,
- std::vector indices,
- bool force_overwrite)
-{
- for (auto index : indices) {
- auto suf = static_cast(index.build_param["override_suffix"]);
- auto file_suf = suf;
- index.build_param.erase("override_suffix");
- std::replace(file_suf.begin(), file_suf.end(), '/', '-');
- index.file += file_suf;
- auto* b = ::benchmark::RegisterBenchmark(
- index.name + suf, bench_build, dataset, index, force_overwrite);
- b->Unit(benchmark::kSecond);
- b->MeasureProcessCPUTime();
- b->UseRealTime();
- }
-}
-
-template
-void register_search(std::shared_ptr> dataset,
- std::vector indices,
- Objective metric_objective,
- const std::vector& threads)
-{
- for (auto index : indices) {
- for (std::size_t i = 0; i < index.search_params.size(); i++) {
- auto suf = static_cast(index.search_params[i]["override_suffix"]);
- index.search_params[i].erase("override_suffix");
-
- auto* b = ::benchmark::RegisterBenchmark(
- index.name + suf, bench_search, index, i, dataset, metric_objective)
- ->Unit(benchmark::kMillisecond)
- /**
- * The following are important for getting accuracy QPS measurements on both CPU
- * and GPU These make sure that
- * - `end_to_end` ~ (`Time` * `Iterations`)
- * - `items_per_second` ~ (`total_queries` / `end_to_end`)
- * - Throughput = `items_per_second`
- */
- ->MeasureProcessCPUTime()
- ->UseRealTime();
- if (metric_objective == Objective::THROUGHPUT) {
- if (index.algo.find("faiss_gpu") != std::string::npos) {
- log_warn(
- "FAISS GPU does not work in throughput mode because the underlying "
- "StandardGpuResources object is not thread-safe. This will cause unexpected results");
- }
- b->ThreadRange(threads[0], threads[1]);
- }
- }
- }
-}
-
-template
-void dispatch_benchmark(const Configuration& conf,
- bool force_overwrite,
- bool build_mode,
- bool search_mode,
- std::string data_prefix,
- std::string index_prefix,
- kv_series override_kv,
- Objective metric_objective,
- const std::vector& threads)
-{
- if (cudart.found()) {
- for (auto [key, value] : cuda_info()) {
- ::benchmark::AddCustomContext(key, value);
- }
- }
- const auto dataset_conf = conf.get_dataset_conf();
- auto base_file = combine_path(data_prefix, dataset_conf.base_file);
- auto query_file = combine_path(data_prefix, dataset_conf.query_file);
- auto gt_file = dataset_conf.groundtruth_neighbors_file;
- if (gt_file.has_value()) { gt_file.emplace(combine_path(data_prefix, gt_file.value())); }
- auto dataset = std::make_shared>(dataset_conf.name,
- base_file,
- dataset_conf.subset_first_row,
- dataset_conf.subset_size,
- query_file,
- dataset_conf.distance,
- gt_file);
- ::benchmark::AddCustomContext("dataset", dataset_conf.name);
- ::benchmark::AddCustomContext("distance", dataset_conf.distance);
- std::vector indices = conf.get_indices();
- if (build_mode) {
- if (file_exists(base_file)) {
- log_info("Using the dataset file '%s'", base_file.c_str());
- ::benchmark::AddCustomContext("n_records", std::to_string(dataset->base_set_size()));
- ::benchmark::AddCustomContext("dim", std::to_string(dataset->dim()));
- } else {
- log_warn("Dataset file '%s' does not exist; benchmarking index building is impossible.",
- base_file.c_str());
- }
- std::vector more_indices{};
- for (auto& index : indices) {
- for (auto param : apply_overrides(index.build_param, override_kv)) {
- auto modified_index = index;
- modified_index.build_param = param;
- modified_index.file = combine_path(index_prefix, modified_index.file);
- more_indices.push_back(modified_index);
- }
- }
- register_build(dataset, more_indices, force_overwrite);
- } else if (search_mode) {
- if (file_exists(query_file)) {
- log_info("Using the query file '%s'", query_file.c_str());
- ::benchmark::AddCustomContext("max_n_queries", std::to_string(dataset->query_set_size()));
- ::benchmark::AddCustomContext("dim", std::to_string(dataset->dim()));
- if (gt_file.has_value()) {
- if (file_exists(*gt_file)) {
- log_info("Using the ground truth file '%s'", gt_file->c_str());
- ::benchmark::AddCustomContext("max_k", std::to_string(dataset->max_k()));
- } else {
- log_warn("Ground truth file '%s' does not exist; the recall won't be reported.",
- gt_file->c_str());
- }
- } else {
- log_warn(
- "Ground truth file is not provided; the recall won't be reported. NB: use "
- "the 'groundtruth_neighbors_file' alongside the 'query_file' key to specify the "
- "path to "
- "the ground truth in your conf.json.");
- }
- } else {
- log_warn("Query file '%s' does not exist; benchmarking search is impossible.",
- query_file.c_str());
- }
- for (auto& index : indices) {
- index.search_params = apply_overrides(index.search_params, override_kv);
- index.file = combine_path(index_prefix, index.file);
- }
- register_search(dataset, indices, metric_objective, threads);
- }
-}
-
-inline auto parse_bool_flag(const char* arg, const char* pat, bool& result) -> bool
-{
- if (strcmp(arg, pat) == 0) {
- result = true;
- return true;
- }
- return false;
-}
-
-inline auto parse_string_flag(const char* arg, const char* pat, std::string& result) -> bool
-{
- auto n = strlen(pat);
- if (strncmp(pat, arg, strlen(pat)) == 0) {
- result = arg + n + 1;
- return true;
- }
- return false;
-}
-
-inline auto run_main(int argc, char** argv) -> int
-{
- bool force_overwrite = false;
- bool build_mode = false;
- bool search_mode = false;
- std::string data_prefix = "data";
- std::string index_prefix = "index";
- std::string new_override_kv = "";
- std::string mode = "latency";
- std::string threads_arg_txt = "";
- std::vector threads = {1, -1}; // min_thread, max_thread
- std::string log_level_str = "";
- int raft_log_level = raft::logger::get(RAFT_NAME).get_level();
- kv_series override_kv{};
-
- char arg0_default[] = "benchmark"; // NOLINT
- char* args_default = arg0_default;
- if (!argv) {
- argc = 1;
- argv = &args_default;
- }
- if (argc == 1) {
- printf_usage();
- return -1;
- }
-
- char* conf_path = argv[--argc];
- std::ifstream conf_stream(conf_path);
-
- for (int i = 1; i < argc; i++) {
- if (parse_bool_flag(argv[i], "--force", force_overwrite) ||
- parse_bool_flag(argv[i], "--build", build_mode) ||
- parse_bool_flag(argv[i], "--search", search_mode) ||
- parse_string_flag(argv[i], "--data_prefix", data_prefix) ||
- parse_string_flag(argv[i], "--index_prefix", index_prefix) ||
- parse_string_flag(argv[i], "--mode", mode) ||
- parse_string_flag(argv[i], "--override_kv", new_override_kv) ||
- parse_string_flag(argv[i], "--threads", threads_arg_txt) ||
- parse_string_flag(argv[i], "--raft_log_level", log_level_str)) {
- if (!log_level_str.empty()) {
- raft_log_level = std::stoi(log_level_str);
- log_level_str = "";
- }
- if (!threads_arg_txt.empty()) {
- auto threads_arg = split(threads_arg_txt, ':');
- threads[0] = std::stoi(threads_arg[0]);
- if (threads_arg.size() > 1) {
- threads[1] = std::stoi(threads_arg[1]);
- } else {
- threads[1] = threads[0];
- }
- threads_arg_txt = "";
- }
- if (!new_override_kv.empty()) {
- auto kvv = split(new_override_kv, ':');
- auto key = kvv[0];
- std::vector vals{};
- for (std::size_t j = 1; j < kvv.size(); j++) {
- vals.push_back(nlohmann::json::parse(kvv[j]));
- }
- override_kv.emplace_back(key, vals);
- new_override_kv = "";
- }
- for (int j = i; j < argc - 1; j++) {
- argv[j] = argv[j + 1];
- }
- argc--;
- i--;
- }
- }
-
- raft::logger::get(RAFT_NAME).set_level(raft_log_level);
-
- Objective metric_objective = Objective::LATENCY;
- if (mode == "throughput") { metric_objective = Objective::THROUGHPUT; }
-
- int max_threads =
- (metric_objective == Objective::THROUGHPUT) ? std::thread::hardware_concurrency() : 1;
- if (threads[1] == -1) threads[1] = max_threads;
-
- if (metric_objective == Objective::LATENCY) {
- if (threads[0] != 1 || threads[1] != 1) {
- log_warn("Latency mode enabled. Overriding threads arg, running with single thread.");
- threads = {1, 1};
- }
- }
-
- if (build_mode == search_mode) {
- log_error("One and only one of --build and --search should be specified");
- printf_usage();
- return -1;
- }
-
- if (!conf_stream) {
- log_error("Can't open configuration file: %s", conf_path);
- return -1;
- }
-
- if (cudart.needed() && !cudart.found()) {
- log_warn("cudart library is not found, GPU-based indices won't work.");
- }
-
- Configuration conf(conf_stream);
- std::string dtype = conf.get_dataset_conf().dtype;
-
- if (dtype == "float") {
- dispatch_benchmark(conf,
- force_overwrite,
- build_mode,
- search_mode,
- data_prefix,
- index_prefix,
- override_kv,
- metric_objective,
- threads);
- } else if (dtype == "half") {
- dispatch_benchmark(conf,
- force_overwrite,
- build_mode,
- search_mode,
- data_prefix,
- index_prefix,
- override_kv,
- metric_objective,
- threads);
- } else if (dtype == "uint8") {
- dispatch_benchmark(conf,
- force_overwrite,
- build_mode,
- search_mode,
- data_prefix,
- index_prefix,
- override_kv,
- metric_objective,
- threads);
- } else if (dtype == "int8") {
- dispatch_benchmark(conf,
- force_overwrite,
- build_mode,
- search_mode,
- data_prefix,
- index_prefix,
- override_kv,
- metric_objective,
- threads);
- } else {
- log_error("datatype '%s' is not supported", dtype.c_str());
- return -1;
- }
-
- ::benchmark::Initialize(&argc, argv, printf_usage);
- if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return -1;
- ::benchmark::RunSpecifiedBenchmarks();
- ::benchmark::Shutdown();
- // Release a possibly cached ANN object, so that it cannot be alive longer than the handle
- // to a shared library it depends on (dynamic benchmark executable).
- current_algo.reset();
- current_algo_props.reset();
- reset_global_device_resources();
- return 0;
-}
-}; // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp
deleted file mode 100644
index 92ba86c6cf..0000000000
--- a/cpp/bench/ann/src/common/conf.hpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "util.hpp"
-
-#include
-#include
-#include
-#include
-#include
-
-#define JSON_DIAGNOSTICS 1
-#include
-
-namespace raft::bench::ann {
-
-class Configuration {
- public:
- struct Index {
- std::string name;
- std::string algo;
- nlohmann::json build_param;
- std::string file;
- std::vector dev_list;
-
- int batch_size;
- int k;
- std::vector search_params;
- };
-
- struct DatasetConf {
- std::string name;
- std::string base_file;
- // use only a subset of base_file,
- // the range of rows is [subset_first_row, subset_first_row + subset_size)
- // however, subset_size = 0 means using all rows after subset_first_row
- // that is, the subset is [subset_first_row, #rows in base_file)
- size_t subset_first_row{0};
- size_t subset_size{0};
- std::string query_file;
- std::string distance;
- std::optional groundtruth_neighbors_file{std::nullopt};
-
- // data type of input dataset, possible values ["float", "int8", "uint8"]
- std::string dtype;
- };
-
- explicit inline Configuration(std::istream& conf_stream)
- {
- // to enable comments in json
- auto conf = nlohmann::json::parse(conf_stream, nullptr, true, true);
-
- parse_dataset_(conf.at("dataset"));
- parse_index_(conf.at("index"), conf.at("search_basic_param"));
- }
-
- [[nodiscard]] inline auto get_dataset_conf() const -> DatasetConf { return dataset_conf_; }
- [[nodiscard]] inline auto get_indices() const -> std::vector { return indices_; };
-
- private:
- inline void parse_dataset_(const nlohmann::json& conf)
- {
- dataset_conf_.name = conf.at("name");
- dataset_conf_.base_file = conf.at("base_file");
- dataset_conf_.query_file = conf.at("query_file");
- dataset_conf_.distance = conf.at("distance");
-
- if (conf.contains("groundtruth_neighbors_file")) {
- dataset_conf_.groundtruth_neighbors_file = conf.at("groundtruth_neighbors_file");
- }
- if (conf.contains("subset_first_row")) {
- dataset_conf_.subset_first_row = conf.at("subset_first_row");
- }
- if (conf.contains("subset_size")) { dataset_conf_.subset_size = conf.at("subset_size"); }
-
- if (conf.contains("dtype")) {
- dataset_conf_.dtype = conf.at("dtype");
- } else {
- auto filename = dataset_conf_.base_file;
- if (filename.size() > 6 && filename.compare(filename.size() - 6, 6, "f16bin") == 0) {
- dataset_conf_.dtype = "half";
- } else if (filename.size() > 9 &&
- filename.compare(filename.size() - 9, 9, "fp16.fbin") == 0) {
- dataset_conf_.dtype = "half";
- } else if (filename.size() > 4 && filename.compare(filename.size() - 4, 4, "fbin") == 0) {
- dataset_conf_.dtype = "float";
- } else if (filename.size() > 5 && filename.compare(filename.size() - 5, 5, "u8bin") == 0) {
- dataset_conf_.dtype = "uint8";
- } else if (filename.size() > 5 && filename.compare(filename.size() - 5, 5, "i8bin") == 0) {
- dataset_conf_.dtype = "int8";
- } else {
- log_error("Could not determine data type of the dataset %s", filename.c_str());
- }
- }
- }
- inline void parse_index_(const nlohmann::json& index_conf,
- const nlohmann::json& search_basic_conf)
- {
- const int batch_size = search_basic_conf.at("batch_size");
- const int k = search_basic_conf.at("k");
-
- for (const auto& conf : index_conf) {
- Index index;
- index.name = conf.at("name");
- index.algo = conf.at("algo");
- index.build_param = conf.at("build_param");
- index.file = conf.at("file");
- index.batch_size = batch_size;
- index.k = k;
-
- if (conf.contains("multigpu")) {
- for (auto it : conf.at("multigpu")) {
- index.dev_list.push_back(it);
- }
- if (index.dev_list.empty()) { throw std::runtime_error("dev_list shouln't be empty!"); }
- index.dev_list.shrink_to_fit();
- index.build_param["multigpu"] = conf["multigpu"];
- }
-
- for (auto param : conf.at("search_params")) {
- /* ### Special parameters for backward compatibility ###
-
- - Local values of `k` and `n_queries` take priority.
- - The legacy "batch_size" renamed to `n_queries`.
- - Basic search params are used otherwise.
- */
- if (!param.contains("k")) { param["k"] = k; }
- if (!param.contains("n_queries")) {
- if (param.contains("batch_size")) {
- param["n_queries"] = param["batch_size"];
- param.erase("batch_size");
- } else {
- param["n_queries"] = batch_size;
- }
- }
- index.search_params.push_back(param);
- }
-
- indices_.push_back(index);
- }
- }
-
- DatasetConf dataset_conf_;
- std::vector indices_;
-};
-
-} // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
deleted file mode 100644
index 27be26dfe9..0000000000
--- a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include
-
-#include
-#include
-
-#include
-
-#include
-#include
-
-namespace raft::mr {
-/**
- * @brief `device_memory_resource` derived class that uses mmap to allocate memory.
- * This class enables memory allocation using huge pages.
- * It is assumed that the allocated memory is directly accessible on device. This currently only
- * works on GH systems.
- *
- * TODO(tfeher): consider improving or removing this helper once we made progress with
- * https://github.com/rapidsai/raft/issues/1819
- */
-class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
- public:
- cuda_huge_page_resource() = default;
- ~cuda_huge_page_resource() override = default;
- cuda_huge_page_resource(cuda_huge_page_resource const&) = default;
- cuda_huge_page_resource(cuda_huge_page_resource&&) = default;
- cuda_huge_page_resource& operator=(cuda_huge_page_resource const&) = default;
- cuda_huge_page_resource& operator=(cuda_huge_page_resource&&) = default;
-
- private:
- /**
- * @brief Allocates memory of size at least `bytes` using cudaMalloc.
- *
- * The returned pointer has at least 256B alignment.
- *
- * @note Stream argument is ignored
- *
- * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
- *
- * @param bytes The size, in bytes, of the allocation
- * @return void* Pointer to the newly allocated memory
- */
- void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
- {
- void* _addr{nullptr};
- _addr = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (_addr == MAP_FAILED) { RAFT_FAIL("huge_page_resource::MAP FAILED"); }
- if (madvise(_addr, bytes, MADV_HUGEPAGE) == -1) {
- munmap(_addr, bytes);
- RAFT_FAIL("huge_page_resource::madvise MADV_HUGEPAGE");
- }
- memset(_addr, 0, bytes);
- return _addr;
- }
-
- /**
- * @brief Deallocate memory pointed to by \p p.
- *
- * @note Stream argument is ignored.
- *
- * @throws Nothing.
- *
- * @param p Pointer to be deallocated
- */
- void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view) override
- {
- if (munmap(ptr, size) == -1) { RAFT_FAIL("huge_page_resource::munmap"); }
- }
-
- /**
- * @brief Compare this resource to another.
- *
- * Two cuda_huge_page_resources always compare equal, because they can each
- * deallocate memory allocated by the other.
- *
- * @throws Nothing.
- *
- * @param other The other resource to compare to
- * @return true If the two resources are equivalent
- * @return false If the two resources are not equal
- */
- [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
- {
- return dynamic_cast(&other) != nullptr;
- }
-};
-} // namespace raft::mr
diff --git a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp
deleted file mode 100644
index 3256fc293c..0000000000
--- a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include
-#include
-#include
-
-#include
-
-namespace raft::mr {
-/**
- * @brief `device_memory_resource` derived class that uses cudaMallocHost/Free for
- * allocation/deallocation.
- *
- * This is almost the same as rmm::mr::host::pinned_memory_resource, but it has
- * device_memory_resource as base class. Pinned memory can be accessed from device,
- * and using this allocator we can create device_mdarray backed by pinned allocator.
- *
- * TODO(tfeher): it would be preferred to just rely on the existing allocator from rmm
- * (pinned_memory_resource), but that is incompatible with the container_policy class
- * for device matrix, because the latter expects a device_memory_resource. We shall
- * revise this once we progress with Issue https://github.com/rapidsai/raft/issues/1819
- */
-class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
- public:
- cuda_pinned_resource() = default;
- ~cuda_pinned_resource() override = default;
- cuda_pinned_resource(cuda_pinned_resource const&) = default;
- cuda_pinned_resource(cuda_pinned_resource&&) = default;
- cuda_pinned_resource& operator=(cuda_pinned_resource const&) = default;
- cuda_pinned_resource& operator=(cuda_pinned_resource&&) = default;
-
- private:
- /**
- * @brief Allocates memory of size at least `bytes` using cudaMalloc.
- *
- * The returned pointer has at least 256B alignment.
- *
- * @note Stream argument is ignored
- *
- * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
- *
- * @param bytes The size, in bytes, of the allocation
- * @return void* Pointer to the newly allocated memory
- */
- void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
- {
- void* ptr{nullptr};
- RMM_CUDA_TRY_ALLOC(cudaMallocHost(&ptr, bytes));
- return ptr;
- }
-
- /**
- * @brief Deallocate memory pointed to by \p p.
- *
- * @note Stream argument is ignored.
- *
- * @throws Nothing.
- *
- * @param p Pointer to be deallocated
- */
- void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view) override
- {
- RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr));
- }
-
- /**
- * @brief Compare this resource to another.
- *
- * Two cuda_pinned_resources always compare equal, because they can each
- * deallocate memory allocated by the other.
- *
- * @throws Nothing.
- *
- * @param other The other resource to compare to
- * @return true If the two resources are equivalent
- * @return false If the two resources are not equal
- */
- [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
- {
- return dynamic_cast(&other) != nullptr;
- }
-};
-} // namespace raft::mr
diff --git a/cpp/bench/ann/src/common/cuda_stub.hpp b/cpp/bench/ann/src/common/cuda_stub.hpp
deleted file mode 100644
index 5ed138a86d..0000000000
--- a/cpp/bench/ann/src/common/cuda_stub.hpp
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-The content of this header is governed by two preprocessor definitions:
-
- - BUILD_CPU_ONLY - whether none of the CUDA functions are used.
- - ANN_BENCH_LINK_CUDART - dynamically link against this string if defined.
-
-___________________________________________________________________________________
-|BUILD_CPU_ONLY | ANN_BENCH_LINK_CUDART | cudart | cuda_runtime_api.h |
-| | | found | needed | included |
-|---------------|-----------------------|-----------|---------|--------------------|
-| ON | | false | false | NO |
-| ON | "cudart.so.xx.xx" | false | false | NO |
-| OFF | | true | true | YES |
-| OFF | "cudart.so.xx.xx" | | true | YES |
-------------------------------------------------------------------------------------
-*/
-
-#pragma once
-
-#ifndef BUILD_CPU_ONLY
-#include
-#include
-#ifdef ANN_BENCH_LINK_CUDART
-#include
-
-#include
-#endif
-#else
-#include
-
-typedef void* cudaStream_t;
-typedef void* cudaEvent_t;
-typedef uint16_t half;
-#endif
-
-namespace raft::bench::ann {
-
-struct cuda_lib_handle {
- void* handle{nullptr};
- explicit cuda_lib_handle()
- {
-#ifdef ANN_BENCH_LINK_CUDART
- constexpr int kFlags = RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE;
- // The full name of the linked cudart library 'cudart.so.MAJOR.MINOR.PATCH'
- char libname[] = ANN_BENCH_LINK_CUDART; // NOLINT
- handle = dlopen(ANN_BENCH_LINK_CUDART, kFlags);
- if (handle != nullptr) { return; }
- // try strip the PATCH
- auto p = strrchr(libname, '.');
- p[0] = 0;
- handle = dlopen(libname, kFlags);
- if (handle != nullptr) { return; }
- // try set the MINOR version to 0
- p = strrchr(libname, '.');
- p[1] = '0';
- p[2] = 0;
- handle = dlopen(libname, kFlags);
- if (handle != nullptr) { return; }
- // try strip the MINOR
- p[0] = 0;
- handle = dlopen(libname, kFlags);
- if (handle != nullptr) { return; }
- // try strip the MAJOR
- p = strrchr(libname, '.');
- p[0] = 0;
- handle = dlopen(libname, kFlags);
-#endif
- }
- ~cuda_lib_handle() noexcept
- {
-#ifdef ANN_BENCH_LINK_CUDART
- if (handle != nullptr) { dlclose(handle); }
-#endif
- }
-
- template
- auto sym(const char* name) -> Symbol
- {
-#ifdef ANN_BENCH_LINK_CUDART
- return reinterpret_cast(dlsym(handle, name));
-#else
- return nullptr;
-#endif
- }
-
- /** Whether this is NOT a cpu-only package. */
- [[nodiscard]] constexpr inline auto needed() const -> bool
- {
-#if defined(BUILD_CPU_ONLY)
- return false;
-#else
- return true;
-#endif
- }
-
- /** CUDA found, either at compile time or at runtime. */
- [[nodiscard]] inline auto found() const -> bool
- {
-#if defined(BUILD_CPU_ONLY)
- return false;
-#elif defined(ANN_BENCH_LINK_CUDART)
- return handle != nullptr;
-#else
- return true;
-#endif
- }
-};
-
-static inline cuda_lib_handle cudart{};
-
-#ifdef ANN_BENCH_LINK_CUDART
-namespace stub {
-
-[[gnu::weak, gnu::noinline]] cudaError_t cudaMemcpy(void* dst,
- const void* src,
- size_t count,
- enum cudaMemcpyKind kind)
-{
- return cudaSuccess;
-}
-
-[[gnu::weak, gnu::noinline]] cudaError_t cudaMalloc(void** ptr, size_t size)
-{
- *ptr = nullptr;
- return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaMemset(void* devPtr, int value, size_t count)
-{
- return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaFree(void* devPtr) { return cudaSuccess; }
-[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamCreate(cudaStream_t* pStream)
-{
- *pStream = 0;
- return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamCreateWithFlags(cudaStream_t* pStream,
- unsigned int flags)
-{
- *pStream = 0;
- return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamDestroy(cudaStream_t pStream)
-{
- return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaDeviceSynchronize() { return cudaSuccess; }
-
-[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamSynchronize(cudaStream_t pStream)
-{
- return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaEventCreate(cudaEvent_t* event)
-{
- *event = 0;
- return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream)
-{
- return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaEventSynchronize(cudaEvent_t event)
-{
- return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaEventElapsedTime(float* ms,
- cudaEvent_t start,
- cudaEvent_t end)
-{
- *ms = 0;
- return cudaSuccess;
-}
-[[gnu::weak, gnu::noinline]] cudaError_t cudaEventDestroy(cudaEvent_t event) { return cudaSuccess; }
-[[gnu::weak, gnu::noinline]] cudaError_t cudaGetDevice(int* device)
-{
- *device = 0;
- return cudaSuccess;
-};
-[[gnu::weak, gnu::noinline]] cudaError_t cudaDriverGetVersion(int* driver)
-{
- *driver = 0;
- return cudaSuccess;
-};
-[[gnu::weak, gnu::noinline]] cudaError_t cudaRuntimeGetVersion(int* runtime)
-{
- *runtime = 0;
- return cudaSuccess;
-};
-[[gnu::weak, gnu::noinline]] cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp* prop,
- int device)
-{
- *prop = cudaDeviceProp{};
- return cudaSuccess;
-}
-
-} // namespace stub
-
-#define RAFT_DECLARE_CUDART(fun) \
- static inline decltype(&stub::fun) fun = \
- cudart.found() ? cudart.sym(#fun) : &stub::fun
-
-RAFT_DECLARE_CUDART(cudaMemcpy);
-RAFT_DECLARE_CUDART(cudaMalloc);
-RAFT_DECLARE_CUDART(cudaMemset);
-RAFT_DECLARE_CUDART(cudaFree);
-RAFT_DECLARE_CUDART(cudaStreamCreate);
-RAFT_DECLARE_CUDART(cudaStreamCreateWithFlags);
-RAFT_DECLARE_CUDART(cudaStreamDestroy);
-RAFT_DECLARE_CUDART(cudaDeviceSynchronize);
-RAFT_DECLARE_CUDART(cudaStreamSynchronize);
-RAFT_DECLARE_CUDART(cudaEventCreate);
-RAFT_DECLARE_CUDART(cudaEventRecord);
-RAFT_DECLARE_CUDART(cudaEventSynchronize);
-RAFT_DECLARE_CUDART(cudaEventElapsedTime);
-RAFT_DECLARE_CUDART(cudaEventDestroy);
-RAFT_DECLARE_CUDART(cudaGetDevice);
-RAFT_DECLARE_CUDART(cudaDriverGetVersion);
-RAFT_DECLARE_CUDART(cudaRuntimeGetVersion);
-RAFT_DECLARE_CUDART(cudaGetDeviceProperties);
-
-#undef RAFT_DECLARE_CUDART
-#endif
-
-}; // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/dataset.hpp b/cpp/bench/ann/src/common/dataset.hpp
deleted file mode 100644
index 8fcff77d3c..0000000000
--- a/cpp/bench/ann/src/common/dataset.hpp
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
- * Copyright (c) 2023-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "util.hpp"
-
-#include
-#include
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-namespace raft::bench::ann {
-
-// http://big-ann-benchmarks.com/index.html:
-// binary format that starts with 8 bytes of data consisting of num_points(uint32_t)
-// num_dimensions(uint32) followed by num_pts x num_dimensions x sizeof(type) bytes of
-// data stored one vector after another.
-// Data files will have suffixes .fbin, .u8bin, and .i8bin to represent float32, uint8
-// and int8 type data.
-// As extensions for this benchmark, half and int data files will have suffixes .f16bin
-// and .ibin, respectively.
-template
-class BinFile {
- public:
- BinFile(const std::string& file,
- const std::string& mode,
- uint32_t subset_first_row = 0,
- uint32_t subset_size = 0);
- ~BinFile()
- {
- if (mapped_ptr_ != nullptr) { unmap(); }
- if (fp_ != nullptr) { fclose(fp_); }
- }
- BinFile(const BinFile&) = delete;
- BinFile& operator=(const BinFile&) = delete;
-
- void get_shape(size_t* nrows, int* ndims) const
- {
- assert(read_mode_);
- if (!fp_) { open_file_(); }
- *nrows = nrows_;
- *ndims = ndims_;
- }
-
- void read(T* data) const
- {
- assert(read_mode_);
- if (!fp_) { open_file_(); }
- size_t total = static_cast(nrows_) * ndims_;
- if (fread(data, sizeof(T), total, fp_) != total) {
- throw std::runtime_error("fread() BinFile " + file_ + " failed");
- }
- }
-
- void write(const T* data, uint32_t nrows, uint32_t ndims)
- {
- assert(!read_mode_);
- if (!fp_) { open_file_(); }
- if (fwrite(&nrows, sizeof(uint32_t), 1, fp_) != 1) {
- throw std::runtime_error("fwrite() BinFile " + file_ + " failed");
- }
- if (fwrite(&ndims, sizeof(uint32_t), 1, fp_) != 1) {
- throw std::runtime_error("fwrite() BinFile " + file_ + " failed");
- }
-
- size_t total = static_cast(nrows) * ndims;
- if (fwrite(data, sizeof(T), total, fp_) != total) {
- throw std::runtime_error("fwrite() BinFile " + file_ + " failed");
- }
- }
-
- T* map() const
- {
- assert(read_mode_);
- if (!fp_) { open_file_(); }
- int fid = fileno(fp_);
- mapped_ptr_ = mmap(nullptr, file_size_, PROT_READ, MAP_PRIVATE, fid, 0);
- if (mapped_ptr_ == MAP_FAILED) {
- mapped_ptr_ = nullptr;
- throw std::runtime_error("mmap error: Value of errno " + std::to_string(errno) + ", " +
- std::string(strerror(errno)));
- }
- return reinterpret_cast(reinterpret_cast(mapped_ptr_) + 2 * sizeof(uint32_t) +
- subset_first_row_ * ndims_ * sizeof(T));
- }
-
- void unmap() const
- {
- if (munmap(mapped_ptr_, file_size_) == -1) {
- throw std::runtime_error("munmap error: " + std::string(strerror(errno)));
- }
- }
-
- private:
- void check_suffix_();
- void open_file_() const;
-
- std::string file_;
- bool read_mode_;
- uint32_t subset_first_row_;
- uint32_t subset_size_;
-
- mutable FILE* fp_{nullptr};
- mutable uint32_t nrows_;
- mutable uint32_t ndims_;
- mutable size_t file_size_;
- mutable void* mapped_ptr_{nullptr};
-};
-
-template
-BinFile::BinFile(const std::string& file,
- const std::string& mode,
- uint32_t subset_first_row,
- uint32_t subset_size)
- : file_(file),
- read_mode_(mode == "r"),
- subset_first_row_(subset_first_row),
- subset_size_(subset_size),
- fp_(nullptr)
-{
- check_suffix_();
-
- if (!read_mode_) {
- if (mode == "w") {
- if (subset_first_row != 0) {
- throw std::runtime_error("subset_first_row should be zero for write mode");
- }
- if (subset_size != 0) {
- throw std::runtime_error("subset_size should be zero for write mode");
- }
- } else {
- throw std::runtime_error("BinFile's mode must be either 'r' or 'w': " + file_);
- }
- }
-}
-
-template
-void BinFile::open_file_() const
-{
- fp_ = fopen(file_.c_str(), read_mode_ ? "r" : "w");
- if (!fp_) { throw std::runtime_error("open BinFile failed: " + file_); }
-
- if (read_mode_) {
- struct stat statbuf;
- if (stat(file_.c_str(), &statbuf) != 0) { throw std::runtime_error("stat() failed: " + file_); }
- file_size_ = statbuf.st_size;
-
- uint32_t header[2];
- if (fread(header, sizeof(uint32_t), 2, fp_) != 2) {
- throw std::runtime_error("read header of BinFile failed: " + file_);
- }
- nrows_ = header[0];
- ndims_ = header[1];
-
- size_t expected_file_size =
- 2 * sizeof(uint32_t) + static_cast(nrows_) * ndims_ * sizeof(T);
- if (file_size_ != expected_file_size) {
- throw std::runtime_error("expected file size of " + file_ + " is " +
- std::to_string(expected_file_size) + ", however, actual size is " +
- std::to_string(file_size_));
- }
-
- if (subset_first_row_ >= nrows_) {
- throw std::runtime_error(file_ + ": subset_first_row (" + std::to_string(subset_first_row_) +
- ") >= nrows (" + std::to_string(nrows_) + ")");
- }
- if (subset_first_row_ + subset_size_ > nrows_) {
- throw std::runtime_error(file_ + ": subset_first_row (" + std::to_string(subset_first_row_) +
- ") + subset_size (" + std::to_string(subset_size_) + ") > nrows (" +
- std::to_string(nrows_) + ")");
- }
-
- if (subset_first_row_) {
- static_assert(sizeof(long) == 8, "fseek() don't support 64-bit offset");
- if (fseek(fp_, sizeof(T) * subset_first_row_ * ndims_, SEEK_CUR) == -1) {
- throw std::runtime_error(file_ + ": fseek failed");
- }
- nrows_ -= subset_first_row_;
- }
- if (subset_size_) { nrows_ = subset_size_; }
- }
-}
-
-template
-void BinFile::check_suffix_()
-{
- auto pos = file_.rfind('.');
- if (pos == std::string::npos) {
- throw std::runtime_error("name of BinFile doesn't have a suffix: " + file_);
- }
- std::string suffix = file_.substr(pos + 1);
-
- if constexpr (std::is_same_v) {
- if (suffix != "fbin") {
- throw std::runtime_error("BinFile should has .fbin suffix: " + file_);
- }
- } else if constexpr (std::is_same_v) {
- if (suffix != "f16bin" && suffix != "fbin") {
- throw std::runtime_error("BinFile should has .f16bin suffix: " + file_);
- }
- } else if constexpr (std::is_same_v) {
- if (suffix != "ibin") {
- throw std::runtime_error("BinFile should has .ibin suffix: " + file_);
- }
- } else if constexpr (std::is_same_v) {
- if (suffix != "u8bin") {
- throw std::runtime_error("BinFile should has .u8bin suffix: " + file_);
- }
- } else if constexpr (std::is_same_v) {
- if (suffix != "i8bin") {
- throw std::runtime_error("BinFile should has .i8bin suffix: " + file_);
- }
- } else {
- throw std::runtime_error(
- "T of BinFile should be one of float, half, int, uint8_t, or int8_t");
- }
-}
-
-template
-class Dataset {
- public:
- Dataset(const std::string& name) : name_(name) {}
- Dataset(const std::string& name, const std::string& distance) : name_(name), distance_(distance)
- {
- }
- Dataset(const Dataset&) = delete;
- Dataset& operator=(const Dataset&) = delete;
- virtual ~Dataset();
-
- std::string name() const { return name_; }
- std::string distance() const { return distance_; }
- virtual int dim() const = 0;
- virtual uint32_t max_k() const = 0;
- virtual size_t base_set_size() const = 0;
- virtual size_t query_set_size() const = 0;
-
- // load data lazily, so don't pay the overhead of reading unneeded set
- // e.g. don't load base set when searching
- const T* base_set() const
- {
- if (!base_set_) { load_base_set_(); }
- return base_set_;
- }
-
- const T* query_set() const
- {
- if (!query_set_) { load_query_set_(); }
- return query_set_;
- }
-
- const int32_t* gt_set() const
- {
- if (!gt_set_) { load_gt_set_(); }
- return gt_set_;
- }
-
- const T* base_set_on_gpu() const;
- const T* query_set_on_gpu() const;
- const T* mapped_base_set() const;
-
- auto query_set(MemoryType memory_type) const -> const T*
- {
- switch (memory_type) {
- case MemoryType::Device: return query_set_on_gpu();
- default: return query_set();
- }
- }
-
- auto base_set(MemoryType memory_type) const -> const T*
- {
- switch (memory_type) {
- case MemoryType::Device: return base_set_on_gpu();
- case MemoryType::Host: return base_set();
- case MemoryType::HostMmap: return mapped_base_set();
- default: return nullptr;
- }
- }
-
- protected:
- virtual void load_base_set_() const = 0;
- virtual void load_gt_set_() const = 0;
- virtual void load_query_set_() const = 0;
- virtual void map_base_set_() const = 0;
-
- std::string name_;
- std::string distance_;
-
- mutable T* base_set_ = nullptr;
- mutable T* query_set_ = nullptr;
- mutable T* d_base_set_ = nullptr;
- mutable T* d_query_set_ = nullptr;
- mutable T* mapped_base_set_ = nullptr;
- mutable int32_t* gt_set_ = nullptr;
-};
-
-template
-Dataset::~Dataset()
-{
- delete[] base_set_;
- delete[] query_set_;
- delete[] gt_set_;
-#ifndef BUILD_CPU_ONLY
- if (d_base_set_) { cudaFree(d_base_set_); }
- if (d_query_set_) { cudaFree(d_query_set_); }
-#endif
-}
-
-template
-const T* Dataset::base_set_on_gpu() const
-{
-#ifndef BUILD_CPU_ONLY
- if (!d_base_set_) {
- base_set();
- cudaMalloc((void**)&d_base_set_, base_set_size() * dim() * sizeof(T));
- cudaMemcpy(d_base_set_, base_set_, base_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice);
- }
-#endif
- return d_base_set_;
-}
-
-template
-const T* Dataset::query_set_on_gpu() const
-{
-#ifndef BUILD_CPU_ONLY
- if (!d_query_set_) {
- query_set();
- cudaMalloc((void**)&d_query_set_, query_set_size() * dim() * sizeof(T));
- cudaMemcpy(
- d_query_set_, query_set_, query_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice);
- }
-#endif
- return d_query_set_;
-}
-
-template
-const T* Dataset::mapped_base_set() const
-{
- if (!mapped_base_set_) { map_base_set_(); }
- return mapped_base_set_;
-}
-
-template
-class BinDataset : public Dataset {
- public:
- BinDataset(const std::string& name,
- const std::string& base_file,
- size_t subset_first_row,
- size_t subset_size,
- const std::string& query_file,
- const std::string& distance,
- const std::optional& groundtruth_neighbors_file);
-
- int dim() const override;
- uint32_t max_k() const override;
- size_t base_set_size() const override;
- size_t query_set_size() const override;
-
- private:
- void load_base_set_() const override;
- void load_query_set_() const override;
- void load_gt_set_() const override;
- void map_base_set_() const override;
-
- mutable int dim_ = 0;
- mutable uint32_t max_k_ = 0;
- mutable size_t base_set_size_ = 0;
- mutable size_t query_set_size_ = 0;
-
- BinFile base_file_;
- BinFile query_file_;
- std::optional> gt_file_{std::nullopt};
-};
-
-template
-BinDataset::BinDataset(const std::string& name,
- const std::string& base_file,
- size_t subset_first_row,
- size_t subset_size,
- const std::string& query_file,
- const std::string& distance,
- const std::optional& groundtruth_neighbors_file)
- : Dataset(name, distance),
- base_file_(base_file, "r", subset_first_row, subset_size),
- query_file_(query_file, "r")
-{
- if (groundtruth_neighbors_file.has_value()) {
- gt_file_.emplace(groundtruth_neighbors_file.value(), "r");
- }
-}
-
-template
-int BinDataset::dim() const
-{
- if (dim_ > 0) { return dim_; }
- if (base_set_size() > 0) { return dim_; }
- if (query_set_size() > 0) { return dim_; }
- return dim_;
-}
-
-template
-uint32_t BinDataset::max_k() const
-{
- if (!this->gt_set_) { load_gt_set_(); }
- return max_k_;
-}
-
-template
-size_t BinDataset::query_set_size() const
-{
- if (query_set_size_ > 0) { return query_set_size_; }
- int dim;
- query_file_.get_shape(&query_set_size_, &dim);
- if (query_set_size_ == 0) { throw std::runtime_error("Zero query set size"); }
- if (dim == 0) { throw std::runtime_error("Zero query set dim"); }
- if (dim_ == 0) {
- dim_ = dim;
- } else if (dim_ != dim) {
- throw std::runtime_error("base set dim (" + std::to_string(dim_) + ") != query set dim (" +
- std::to_string(dim));
- }
- return query_set_size_;
-}
-
-template
-size_t BinDataset::base_set_size() const
-{
- if (base_set_size_ > 0) { return base_set_size_; }
- int dim;
- base_file_.get_shape(&base_set_size_, &dim);
- if (base_set_size_ == 0) { throw std::runtime_error("Zero base set size"); }
- if (dim == 0) { throw std::runtime_error("Zero base set dim"); }
- if (dim_ == 0) {
- dim_ = dim;
- } else if (dim_ != dim) {
- throw std::runtime_error("base set dim (" + std::to_string(dim) + ") != query set dim (" +
- std::to_string(dim_));
- }
- return base_set_size_;
-}
-
-template
-void BinDataset::load_base_set_() const
-{
- this->base_set_ = new T[base_set_size() * dim()];
- base_file_.read(this->base_set_);
-}
-
-template
-void BinDataset