diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 41b6a639d8..32aab5656b 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -52,6 +52,7 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
+ skip_upload_pkgs: libraft-template
docs-build:
if: github.ref_type == 'branch' && github.event_name == 'push'
needs: python-build
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7606914589..d6e4ecb676 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -62,7 +62,7 @@ repos:
entry: ./cpp/scripts/run-cmake-format.sh cmake-format
language: python
types: [cmake]
- exclude: .*/thirdparty/.*
+ exclude: .*/thirdparty/.*|.*FindAVX.cmake.*
# Note that pre-commit autoupdate does not update the versions
# of dependencies, so we'll have to update this manually.
additional_dependencies:
@@ -101,7 +101,7 @@ repos:
args: ["--toml", "pyproject.toml"]
exclude: (?x)^(^CHANGELOG.md$)
- repo: https://github.com/rapidsai/dependency-file-generator
- rev: v1.4.0
+ rev: v1.5.1
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
diff --git a/README.md b/README.md
index a178d90008..b77e906262 100755
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
#
RAFT: Reusable Accelerated Functions and Tools
+![Navigating the canyons of accelerated possibilities](img/raft.png)
+
## Resources
- [RAFT Reference Documentation](https://docs.rapids.ai/api/raft/stable/): API Documentation.
@@ -32,12 +34,16 @@ While not exhaustive, the following general categories help summarize the accele
| **Tools & Utilities** | common utilities for developing CUDA applications, multi-node multi-gpu infrastructure |
-All of RAFT's C++ APIs can be accessed header-only and optional pre-compiled shared libraries can 1) speed up compile times and 2) enable the APIs to be used without CUDA-enabled compilers.
+RAFT is a C++ header-only template library with an optional shared library that
+1) can speed up compile times for common template types, and
+2) provides host-accessible "runtime" APIs, which don't require a CUDA compiler to use
-In addition to the C++ library, RAFT also provides 2 Python libraries:
-- `pylibraft` - lightweight low-level Python wrappers around RAFT's host-accessible "runtime" APIs.
+In addition being a C++ library, RAFT also provides 2 Python libraries:
+- `pylibraft` - lightweight Python wrappers around RAFT's host-accessible "runtime" APIs.
- `raft-dask` - multi-node multi-GPU communicator infrastructure for building distributed algorithms on the GPU with Dask.
+![RAFT is a C++ header-only template library with optional shared library and lightweight Python wrappers](img/arch.png)
+
## Getting started
### RAPIDS Memory Manager (RMM)
@@ -78,9 +84,9 @@ raft::device_resources handle;
int n_samples = 5000;
int n_features = 50;
-auto input = raft::make_device_matrix(handle, n_samples, n_features);
-auto labels = raft::make_device_vector(handle, n_samples);
-auto output = raft::make_device_matrix(handle, n_samples, n_samples);
+auto input = raft::make_device_matrix(handle, n_samples, n_features);
+auto labels = raft::make_device_vector(handle, n_samples);
+auto output = raft::make_device_matrix(handle, n_samples, n_samples);
raft::random::make_blobs(handle, input.view(), labels.view());
@@ -192,8 +198,7 @@ RAFT itself can be installed through conda, [CMake Package Manager (CPM)](https:
The easiest way to install RAFT is through conda and several packages are provided.
- `libraft-headers` RAFT headers
-- `libraft-nn` (optional) contains shared libraries for the nearest neighbors primitives.
-- `libraft-distance` (optional) contains shared libraries for distance primitives.
+- `libraft` (optional) shared library of pre-compiled template specializations and runtime APIs.
- `pylibraft` (optional) Python wrappers around RAFT algorithms and primitives.
- `raft-dask` (optional) enables deployment of multi-node multi-GPU algorithms that use RAFT `raft::comms` in Dask clusters.
@@ -202,73 +207,35 @@ Use the following command to install all of the RAFT packages with conda (replac
mamba install -c rapidsai -c conda-forge -c nvidia raft-dask pylibraft
```
-You can also install the `libraft-*` conda packages individually using the `mamba` command above.
+You can also install the conda packages individually using the `mamba` command above.
-After installing RAFT, `find_package(raft COMPONENTS nn distance)` can be used in your CUDA/C++ cmake build to compile and/or link against needed dependencies in your raft target. `COMPONENTS` are optional and will depend on the packages installed.
+After installing RAFT, `find_package(raft COMPONENTS compiled distributed)` can be used in your CUDA/C++ cmake build to compile and/or link against needed dependencies in your raft target. `COMPONENTS` are optional and will depend on the packages installed.
### Pip
pylibraft and raft-dask both have experimental packages that can be [installed through pip](https://rapids.ai/pip.html#install):
```bash
-pip install pylibraft-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
-pip install raft-dask-cu11 --extra-index-url=https://pypi.ngc.nvidia.com
+pip install pylibraft-cu11 --extra-index-url=https://pypi.nvidia.com
+pip install raft-dask-cu11 --extra-index-url=https://pypi.nvidia.com
```
### CMake & CPM
-RAFT uses the [RAPIDS-CMake](https://github.com/rapidsai/rapids-cmake) library, which makes it simple to include in downstream cmake projects. RAPIDS CMake provides a convenience layer around CPM.
-
-After [installing](https://github.com/rapidsai/rapids-cmake#installation) rapids-cmake in your project, you can begin using RAFT by placing the code snippet below in a file named `get_raft.cmake` and including it in your cmake build with `include(get_raft.cmake)`. This will make available several targets to add to configure the link libraries for your artifacts.
-
-```cmake
-
-set(RAFT_VERSION "22.12")
-set(RAFT_FORK "rapidsai")
-set(RAFT_PINNED_TAG "branch-${RAFT_VERSION}")
-
-function(find_and_configure_raft)
- set(oneValueArgs VERSION FORK PINNED_TAG COMPILE_LIBRARIES)
- cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
- "${multiValueArgs}" ${ARGN} )
-
- #-----------------------------------------------------
- # Invoke CPM find_package()
- #-----------------------------------------------------
-
- rapids_cpm_find(raft ${PKG_VERSION}
- GLOBAL_TARGETS raft::raft
- BUILD_EXPORT_SET projname-exports
- INSTALL_EXPORT_SET projname-exports
- CPM_ARGS
- GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git
- GIT_TAG ${PKG_PINNED_TAG}
- SOURCE_SUBDIR cpp
- OPTIONS
- "BUILD_TESTS OFF"
- "BUILD_BENCH OFF"
- "RAFT_COMPILE_LIBRARIES ${PKG_COMPILE_LIBRARIES}"
- )
-
-endfunction()
-
-# Change pinned tag here to test a commit in CI
-# To use a different RAFT locally, set the CMake variable
-# CPM_raft_SOURCE=/path/to/local/raft
-find_and_configure_raft(VERSION ${RAFT_VERSION}.00
- FORK ${RAFT_FORK}
- PINNED_TAG ${RAFT_PINNED_TAG}
- COMPILE_LIBRARIES NO
-)
-```
+RAFT uses the [RAPIDS-CMake](https://github.com/rapidsai/rapids-cmake) library, which makes it easy to include in downstream cmake projects. RAPIDS-CMake provides a convenience layer around CPM. Please refer to [these instructions](https://github.com/rapidsai/rapids-cmake#installation) to install and use rapids-cmake in your project.
+
+#### Example Template Project
+
+You can find an [example RAFT](cpp/template/README.md) project template in the `cpp/template` directory, which demonstrates how to build a new application with RAFT or incorporate RAFT into an existing cmake project.
+
+#### CMake Targets
-Several CMake targets can be made available by adding components in the table below to the `RAFT_COMPONENTS` list above, separated by spaces. The `raft::raft` target will always be available. RAFT headers require, at a minimum, the CUDA toolkit libraries and RMM dependencies.
+Additional CMake targets can be made available by adding components in the table below to the `RAFT_COMPONENTS` list above, separated by spaces. The `raft::raft` target will always be available. RAFT headers require, at a minimum, the CUDA toolkit libraries and RMM dependencies.
-| Component | Target | Description | Base Dependencies |
-| --- | --- | --- | --- |
-| n/a | `raft::raft` | Full RAFT header library | CUDA toolkit library, RMM, Thrust (optional), NVTools (optional) |
-| distance | `raft::distance` | Pre-compiled template specializations for raft::distance | raft::raft, cuCollections (optional) |
-| nn | `raft::nn` | Pre-compiled template specializations for raft::neighbors | raft::raft, FAISS (optional) |
-| distributed | `raft::distributed` | No specializations | raft::raft, UCX, NCCL |
+| Component | Target | Description | Base Dependencies |
+|-------------|---------------------|-----------------------------------------------------------|---------------------------------------|
+| n/a | `raft::raft` | Full RAFT header library | CUDA toolkit, RMM, NVTX, CCCL, CUTLASS |
+| compiled | `raft::compiled` | Pre-compiled template specializations and runtime library | raft::raft |
+| distributed | `raft::distributed` | Dependencies for `raft::comms` APIs | raft::raft, UCX, NCCL |
### Source
@@ -279,7 +246,7 @@ mamba env create --name raft_dev_env -f conda/environments/all_cuda-118_arch-x86
mamba activate raft_dev_env
```
```
-./build.sh raft-dask pylibraft libraft tests bench --compile-libs
+./build.sh raft-dask pylibraft libraft tests bench --compile-lib
```
The [build](docs/source/build.md) instructions contain more details on building RAFT from source and including it in downstream projects. You can also find a more comprehensive version of the above CPM code snippet the [Building RAFT C++ from source](docs/source/build.md#building-raft-c-from-source-in-cmake) section of the build instructions.
@@ -316,6 +283,7 @@ The folder structure mirrors other RAPIDS repos, with the following folders:
- `internal`: A private header-only component that hosts the code shared between benchmarks and tests.
- `scripts`: Helpful scripts for development
- `src`: Compiled APIs and template specializations for the shared libraries
+ - `template`: A skeleton template containing the bare-bones file structure and cmake configuration for writing applications with RAFT.
- `test`: Googletests source code
- `docs`: Source code and scripts for building library documentation (Uses breath, doxygen, & pydocs)
- `python`: Source code for Python libraries.
diff --git a/build.sh b/build.sh
index 575f6bdaa1..7e1a3e7e36 100755
--- a/build.sh
+++ b/build.sh
@@ -2,7 +2,7 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-# raft build script
+# raft build scripts
# This script is used to build the component(s) in this repo from
# source, and can be called with various options to customize the
@@ -15,11 +15,11 @@ NUMARGS=$#
ARGS=$*
# NOTE: ensure all dir changes are relative to the location of this
-# script, and that this script resides in the repo dir!
+# scripts, and that this script resides in the repo dir!
REPODIR=$(cd $(dirname $0); pwd)
-VALIDARGS="clean libraft pylibraft raft-dask docs tests bench clean --uninstall -v -g -n --compile-libs --compile-nn --compile-dist --allgpuarch --no-nvtx --show_depr_warn -h --buildfaiss --minimal-deps"
-HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench=]
+VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall -v -g -n --compile-lib --allgpuarch --no-nvtx --show_depr_warn --time -h"
+HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-prims=] [--limit-bench-ann=]
where is:
clean - remove all existing build artifacts and configuration (start over)
libraft - build the raft C++ code only. Also builds the C-wrapper library
@@ -28,29 +28,28 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool= is:
-v - verbose build mode
-g - build for debug
-n - no install step
--uninstall - uninstall files for specified targets which were built and installed prior
- --compile-libs - compile shared libraries for all components
- --compile-nn - compile shared library for nn component
- --compile-dist - compile shared library for distance and current random components
- (eventually, this will be renamed to something more generic and
- the only option to be supported)
- --minimal-deps - disables dependencies like thrust so they can be overridden.
+ --compile-lib - compile shared libraries for all components
can be useful for a pure header-only install
--limit-tests - semicolon-separated list of test executables to compile (e.g. NEIGHBORS_TEST;CLUSTER_TEST)
- --limit-bench - semicolon-separated list of benchmark executables to compute (e.g. NEIGHBORS_BENCH;CLUSTER_BENCH)
+ --limit-bench-prims - semicolon-separated list of prims benchmark executables to compute (e.g. NEIGHBORS_PRIMS_BENCH;CLUSTER_PRIMS_BENCH)
+ --limit-bench-ann - semicolon-separated list of ann benchmark executables to compute (e.g. HNSWLIB_ANN_BENCH;RAFT_IVF_PQ_ANN_BENCH)
--allgpuarch - build for all supported GPU architectures
- --buildfaiss - build faiss statically into raft
--no-nvtx - disable nvtx (profiling markers), but allow enabling it in downstream projects
--show_depr_warn - show cmake deprecation warnings
--cmake-args=\\\"\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument)
--cache-tool= - pass the build cache tool (eg: ccache, sccache, distcc) that will be used
to speedup the build process.
+ --time - Enable nvcc compilation time logging into cpp/build/nvcc_compile_log.csv.
+ Results can be interpreted with cpp/scripts/analyze_nvcc_log.py
-h - print this text
default action (no args) is to build libraft, tests, pylibraft and raft-dask targets
@@ -68,20 +67,17 @@ VERBOSE_FLAG=""
BUILD_ALL_GPU_ARCH=0
BUILD_TESTS=OFF
BUILD_TYPE=Release
-BUILD_BENCH=OFF
-BUILD_STATIC_FAISS=OFF
-COMPILE_LIBRARIES=OFF
-COMPILE_NN_LIBRARY=OFF
-COMPILE_DIST_LIBRARY=OFF
-ENABLE_NN_DEPENDENCIES=OFF
+BUILD_PRIMS_BENCH=OFF
+BUILD_ANN_BENCH=OFF
+COMPILE_LIBRARY=OFF
INSTALL_TARGET=install
TEST_TARGETS="CLUSTER_TEST;CORE_TEST;DISTANCE_TEST;LABEL_TEST;LINALG_TEST;MATRIX_TEST;RANDOM_TEST;SOLVERS_TEST;SPARSE_TEST;SPARSE_DIST_TEST;SPARSE_NEIGHBORS_TEST;NEIGHBORS_TEST;STATS_TEST;UTILS_TEST"
BENCH_TARGETS="CLUSTER_BENCH;NEIGHBORS_BENCH;DISTANCE_BENCH;LINALG_BENCH;MATRIX_BENCH;SPARSE_BENCH;RANDOM_BENCH"
-ENABLE_thrust_DEPENDENCY=ON
CACHE_ARGS=""
NVTX=ON
+LOG_COMPILE_TIME=OFF
CLEAN=0
UNINSTALL=0
DISABLE_DEPRECATION_WARNINGS=ON
@@ -161,15 +157,30 @@ function limitTests {
function limitBench {
# Check for option to limit the set of test binaries to build
- if [[ -n $(echo $ARGS | { grep -E "\-\-limit\-bench" || true; } ) ]]; then
+ if [[ -n $(echo $ARGS | { grep -E "\-\-limit\-bench-prims" || true; } ) ]]; then
# There are possible weird edge cases that may cause this regex filter to output nothing and fail silently
# the true pipe will catch any weird edge cases that may happen and will cause the program to fall back
# on the invalid option error
- LIMIT_BENCH_TARGETS=$(echo $ARGS | sed -e 's/.*--limit-bench=//' -e 's/ .*//')
- if [[ -n ${LIMIT_BENCH_TARGETS} ]]; then
+ LIMIT_PRIMS_BENCH_TARGETS=$(echo $ARGS | sed -e 's/.*--limit-bench-prims=//' -e 's/ .*//')
+ if [[ -n ${LIMIT_PRIMS_BENCH_TARGETS} ]]; then
+ # Remove the full LIMIT_PRIMS_BENCH_TARGETS argument from list of args so that it passes validArgs function
+ ARGS=${ARGS//--limit-bench-prims=$LIMIT_PRIMS_BENCH_TARGETS/}
+ PRIMS_BENCH_TARGETS=${LIMIT_PRIMS_BENCH_TARGETS}
+ fi
+ fi
+}
+
+function limitAnnBench {
+ # Check for option to limit the set of test binaries to build
+ if [[ -n $(echo $ARGS | { grep -E "\-\-limit\-bench-ann" || true; } ) ]]; then
+ # There are possible weird edge cases that may cause this regex filter to output nothing and fail silently
+ # the true pipe will catch any weird edge cases that may happen and will cause the program to fall back
+ # on the invalid option error
+ LIMIT_ANN_BENCH_TARGETS=$(echo $ARGS | sed -e 's/.*--limit-bench-ann=//' -e 's/ .*//')
+ if [[ -n ${LIMIT_ANN_BENCH_TARGETS} ]]; then
# Remove the full LIMIT_TEST_TARGETS argument from list of args so that it passes validArgs function
- ARGS=${ARGS//--limit-bench=$LIMIT_BENCH_TARGETS/}
- BENCH_TARGETS=${LIMIT_BENCH_TARGETS}
+ ARGS=${ARGS//--limit-bench-ann=$LIMIT_ANN_BENCH_TARGETS/}
+ ANN_BENCH_TARGETS=${LIMIT_ANN_BENCH_TARGETS}
fi
fi
}
@@ -185,6 +196,7 @@ if (( ${NUMARGS} != 0 )); then
cacheTool
limitTests
limitBench
+ limitAnnBench
for a in ${ARGS}; do
if ! (echo " ${VALIDARGS} " | grep -q " ${a} "); then
echo "Invalid option: ${a}"
@@ -257,10 +269,6 @@ if hasArg -n; then
INSTALL_TARGET=""
fi
-if hasArg --minimal-deps; then
- ENABLE_thrust_DEPENDENCY=OFF
-fi
-
if hasArg -v; then
VERBOSE_FLAG="-v"
CMAKE_LOG_LEVEL="VERBOSE"
@@ -273,37 +281,16 @@ if hasArg --allgpuarch; then
BUILD_ALL_GPU_ARCH=1
fi
-if hasArg --compile-libs || (( ${NUMARGS} == 0 )); then
- COMPILE_LIBRARIES=ON
-fi
-
-if hasArg --compile-nn || hasArg --compile-libs || (( ${NUMARGS} == 0 )); then
- ENABLE_NN_DEPENDENCIES=ON
- COMPILE_NN_LIBRARY=ON
- CMAKE_TARGET="${CMAKE_TARGET};raft_nn_lib"
-fi
-
-if hasArg --compile-dist || hasArg --compile-libs || (( ${NUMARGS} == 0 )); then
- COMPILE_DIST_LIBRARY=ON
- CMAKE_TARGET="${CMAKE_TARGET};raft_distance_lib"
+if hasArg --compile-lib || (( ${NUMARGS} == 0 )); then
+ COMPILE_LIBRARY=ON
+ CMAKE_TARGET="${CMAKE_TARGET};raft_lib"
fi
if hasArg tests || (( ${NUMARGS} == 0 )); then
BUILD_TESTS=ON
CMAKE_TARGET="${CMAKE_TARGET};${TEST_TARGETS}"
- # Force compile nn library when needed test targets are specified
- if [[ $CMAKE_TARGET == *"CLUSTER_TEST"* || \
- $CMAKE_TARGET == *"SPARSE_DIST_TEST"* || \
- $CMAKE_TARGET == *"SPARSE_NEIGHBORS_TEST"* || \
- $CMAKE_TARGET == *"NEIGHBORS_TEST"* || \
- $CMAKE_TARGET == *"STATS_TEST"* ]]; then
- echo "-- Enabling nearest neighbors lib for gtests"
- ENABLE_NN_DEPENDENCIES=ON
- COMPILE_NN_LIBRARY=ON
- fi
-
- # Force compile distance library when needed test targets are specified
+ # Force compile library when needed test targets are specified
if [[ $CMAKE_TARGET == *"CLUSTER_TEST"* || \
$CMAKE_TARGET == *"DISTANCE_TEST"* || \
$CMAKE_TARGET == *"SPARSE_DIST_TEST" || \
@@ -311,39 +298,37 @@ if hasArg tests || (( ${NUMARGS} == 0 )); then
$CMAKE_TARGET == *"MATRIX_TEST"* || \
$CMAKE_TARGET == *"NEIGHBORS_TEST" || \
$CMAKE_TARGET == *"STATS_TEST"* ]]; then
- echo "-- Enabling distance lib for gtests"
- COMPILE_DIST_LIBRARY=ON
+ echo "-- Enabling compiled lib for gtests"
+ COMPILE_LIBRARY=ON
fi
fi
-if hasArg bench || (( ${NUMARGS} == 0 )); then
- BUILD_BENCH=ON
- CMAKE_TARGET="${CMAKE_TARGET};${BENCH_TARGETS}"
+if hasArg bench-prims || (( ${NUMARGS} == 0 )); then
+ BUILD_PRIMS_BENCH=ON
+ CMAKE_TARGET="${CMAKE_TARGET};${PRIMS_BENCH_TARGETS}"
- # Force compile nn library when needed benchmark targets are specified
- if [[ $CMAKE_TARGET == *"CLUSTER_BENCH"* || \
- $CMAKE_TARGET == *"NEIGHBORS_BENCH"* ]]; then
- echo "-- Enabling nearest neighbors lib for benchmarks"
- ENABLE_NN_DEPENDENCIES=ON
- COMPILE_NN_LIBRARY=ON
+ # Force compile library when needed benchmark targets are specified
+ if [[ $CMAKE_TARGET == *"CLUSTER_PRIMS_BENCH"* || \
+ $CMAKE_TARGET == *"MATRIX_PRIMS_BENCH"* || \
+ $CMAKE_TARGET == *"NEIGHBORS_PRIMS_BENCH"* ]]; then
+ echo "-- Enabling compiled lib for benchmarks"
+ COMPILE_LIBRARY=ON
fi
-
- # Force compile distance library when needed benchmark targets are specified
- if [[ $CMAKE_TARGET == *"CLUSTER_BENCH"* || \
- $CMAKE_TARGET == *"MATRIX_BENCH"* || \
- $CMAKE_TARGET == *"NEIGHBORS_BENCH"* ]]; then
- echo "-- Enabling distance lib for benchmarks"
- COMPILE_DIST_LIBRARY=ON
- fi
-
fi
-if hasArg --buildfaiss; then
- BUILD_STATIC_FAISS=ON
+if hasArg bench-ann || (( ${NUMARGS} == 0 )); then
+ BUILD_ANN_BENCH=ON
+ CMAKE_TARGET="${CMAKE_TARGET};${ANN_BENCH_TARGETS}"
+ COMPILE_LIBRARY=ON
fi
+
if hasArg --no-nvtx; then
NVTX=OFF
fi
+if hasArg --time; then
+ echo "-- Logging compile times to cpp/build/nvcc_compile_log.csv"
+ LOG_COMPILE_TIME=ON
+fi
if hasArg --show_depr_warn; then
DISABLE_DEPRECATION_WARNINGS=OFF
fi
@@ -351,8 +336,6 @@ if hasArg clean; then
CLEAN=1
fi
-
-
if [[ ${CMAKE_TARGET} == "" ]]; then
CMAKE_TARGET="all"
fi
@@ -386,7 +369,7 @@ fi
################################################################################
# Configure for building all C++ targets
-if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || hasArg bench; then
+if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || hasArg bench-prims || hasArg bench-ann; then
if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then
RAFT_CMAKE_CUDA_ARCHITECTURES="NATIVE"
echo "Building for the architecture of the GPU in the system..."
@@ -401,17 +384,14 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-DCMAKE_CUDA_ARCHITECTURES=${RAFT_CMAKE_CUDA_ARCHITECTURES} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
- -DRAFT_COMPILE_LIBRARIES=${COMPILE_LIBRARIES} \
- -DRAFT_ENABLE_NN_DEPENDENCIES=${ENABLE_NN_DEPENDENCIES} \
+ -DRAFT_COMPILE_LIBRARY=${COMPILE_LIBRARY} \
-DRAFT_NVTX=${NVTX} \
+ -DCUDA_LOG_COMPILE_TIME=${LOG_COMPILE_TIME} \
-DDISABLE_DEPRECATION_WARNINGS=${DISABLE_DEPRECATION_WARNINGS} \
-DBUILD_TESTS=${BUILD_TESTS} \
- -DBUILD_BENCH=${BUILD_BENCH} \
+ -DBUILD_PRIMS_BENCH=${BUILD_PRIMS_BENCH} \
+ -DBUILD_ANN_BENCH=${BUILD_ANN_BENCH} \
-DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \
- -DRAFT_COMPILE_NN_LIBRARY=${COMPILE_NN_LIBRARY} \
- -DRAFT_COMPILE_DIST_LIBRARY=${COMPILE_DIST_LIBRARY} \
- -DRAFT_USE_FAISS_STATIC=${BUILD_STATIC_FAISS} \
- -DRAFT_ENABLE_thrust_DEPENDENCY=${ENABLE_thrust_DEPENDENCY} \
${CACHE_ARGS} \
${EXTRA_CMAKE_ARGS}
@@ -425,34 +405,34 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
fi
fi
-# Build and (optionally) install the raft-dask Python package
-if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
+# Build and (optionally) install the pylibraft Python package
+if (( ${NUMARGS} == 0 )) || hasArg pylibraft; then
# Append `-DFIND_RAFT_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option.
if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_RAFT_CPP"* ]]; then
EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_RAFT_CPP=ON"
fi
-
- cd ${REPODIR}/python/raft-dask
+ cd ${REPODIR}/python/pylibraft
python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH="${RAFT_DASK_BUILD_DIR};${INSTALL_PREFIX}" -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
if [[ ${INSTALL_TARGET} != "" ]]; then
python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} ${EXTRA_CMAKE_ARGS}
fi
fi
-# Build and (optionally) install the pylibraft Python package
-if (( ${NUMARGS} == 0 )) || hasArg pylibraft; then
+# Build and (optionally) install the raft-dask Python package
+if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
# Append `-DFIND_RAFT_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option.
if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_RAFT_CPP"* ]]; then
EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_RAFT_CPP=ON"
fi
- cd ${REPODIR}/python/pylibraft
+ cd ${REPODIR}/python/raft-dask
python setup.py build_ext --inplace -- -DCMAKE_PREFIX_PATH="${RAFT_DASK_BUILD_DIR};${INSTALL_PREFIX}" -DCMAKE_LIBRARY_PATH=${LIBRAFT_BUILD_DIR} ${EXTRA_CMAKE_ARGS} -- -j${PARALLEL_LEVEL:-1}
if [[ ${INSTALL_TARGET} != "" ]]; then
python setup.py install --single-version-externally-managed --record=record.txt -- -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} ${EXTRA_CMAKE_ARGS}
fi
fi
+
if hasArg docs; then
set -x
cd ${DOXYGEN_BUILD_DIR}
@@ -460,3 +440,12 @@ if hasArg docs; then
cd ${SPHINX_BUILD_DIR}
sphinx-build -b html source _html
fi
+
+################################################################################
+# Initiate build for example RAFT application template (if needed)
+
+if hasArg template; then
+ pushd cpp/template
+ ./build.sh
+ popd
+fi
diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index 78c4399d28..5db6fa11be 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -24,9 +24,8 @@ VERSION_NUMBER=$(rapids-get-rapids-version-from-git)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
- libraft-distance \
+ libraft \
libraft-headers \
- libraft-nn \
pylibraft \
raft-dask
diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py
index a44314a6ce..123aeba87b 100644
--- a/ci/checks/copyright.py
+++ b/ci/checks/copyright.py
@@ -192,7 +192,8 @@ def checkCopyright_main():
action="append",
required=False,
default=["python/cuml/_thirdparty/",
- "cpp/include/raft/thirdparty/"],
+ "cpp/include/raft/thirdparty/",
+ "cpp/cmake/modules/FindAVX.cmake"],
help=("Exclude the paths specified (regexp). "
"Can be specified multiple times."))
diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
index ed3d2a15fd..efc8f0c77c 100755
--- a/ci/release/apply_wheel_modifications.sh
+++ b/ci/release/apply_wheel_modifications.sh
@@ -6,10 +6,6 @@
VERSION=${1}
CUDA_SUFFIX=${2}
-# __init__.py versions
-sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/pylibraft/pylibraft/__init__.py
-sed -i "s/__version__ = .*/__version__ = \"${VERSION}\"/g" python/raft-dask/raft_dask/__init__.py
-
# pyproject.toml versions
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/pylibraft/pyproject.toml
sed -i "s/^version = .*/version = \"${VERSION}\"/g" python/raft-dask/pyproject.toml
diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh
index 44e446d8f6..e32697a68a 100755
--- a/ci/test_cpp.sh
+++ b/ci/test_cpp.sh
@@ -26,7 +26,7 @@ rapids-print-env
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
- libraft-headers libraft-distance libraft-nn libraft-tests
+ libraft-headers libraft libraft-tests
rapids-logger "Check GPU usage"
nvidia-smi
diff --git a/ci/test_python.sh b/ci/test_python.sh
index 934c9c6951..cb6b7631e4 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -31,7 +31,7 @@ rapids-print-env
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
--channel "${PYTHON_CHANNEL}" \
- libraft-distance libraft-headers pylibraft raft-dask
+ libraft libraft-headers pylibraft raft-dask
rapids-logger "Check GPU usage"
nvidia-smi
diff --git a/ci/wheel_smoke_test_raft_dask.py b/ci/wheel_smoke_test_raft_dask.py
index 32c13e61ca..5709ac901c 100644
--- a/ci/wheel_smoke_test_raft_dask.py
+++ b/ci/wheel_smoke_test_raft_dask.py
@@ -1,4 +1,19 @@
-from dask.distributed import Client, wait
+# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from dask.distributed import Client, get_worker, wait
from dask_cuda import LocalCUDACluster, initialize
from raft_dask.common import (
@@ -23,12 +38,12 @@
def func_test_send_recv(sessionId, n_trials):
- handle = local_handle(sessionId)
+ handle = local_handle(sessionId, dask_worker=get_worker())
return perform_test_comms_send_recv(handle, n_trials)
def func_test_collective(func, sessionId, root):
- handle = local_handle(sessionId)
+ handle = local_handle(sessionId, dask_worker=get_worker())
return func(handle, root)
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 47af29d9d2..1afebc98e6 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -18,14 +18,14 @@ dependencies:
- cupy
- cxx-compiler
- cython>=0.29,<0.30
-- dask-cuda=23.04
+- dask-cuda==23.4.*
- dask>=2023.1.1
- distributed>=2023.1.1
- doxygen>=1.8.20
-- faiss-proc=*=cuda
- gcc_linux-64=11.*
- graphviz
- ipython
+- joblib>=0.11
- libcublas-dev=11.11.3.6
- libcublas=11.11.3.6
- libcurand-dev=10.3.0.86
@@ -34,14 +34,16 @@ dependencies:
- libcusolver=11.4.1.48
- libcusparse-dev=11.7.5.86
- libcusparse=11.7.5.86
-- libfaiss>=1.7.1=cuda*
+- nccl>=2.9.9
- ninja
+- numba>=0.49
+- numpy>=1.21
- numpydoc
- pydata-sphinx-theme
- pytest
- pytest-cov
- recommonmark
-- rmm=23.04
+- rmm==23.4.*
- scikit-build>=0.13.1
- scikit-learn
- scipy
@@ -49,6 +51,6 @@ dependencies:
- sphinx-markdown-tables
- sysroot_linux-64==2.17
- ucx-proc=*=gpu
-- ucx-py=0.31.*
+- ucx-py==0.31.*
- ucx>=1.13.0
name: all_cuda-118_arch-x86_64
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
new file mode 100644
index 0000000000..5965aaef8f
--- /dev/null
+++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -0,0 +1,37 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- c-compiler
+- clang-tools=11.1.0
+- clang=11.1.0
+- cmake>=3.23.1,!=3.25.0
+- cuda-profiler-api=11.8.86
+- cudatoolkit=11.8
+- cxx-compiler
+- cython>=0.29,<0.30
+- faiss-proc=*=cuda
+- gcc_linux-64=11.*
+- glog>=0.6.0
+- h5py>=3.8.0
+- hnswlib=0.7.0
+- libcublas-dev=11.11.3.6
+- libcublas=11.11.3.6
+- libcurand-dev=10.3.0.86
+- libcurand=10.3.0.86
+- libcusolver-dev=11.4.1.48
+- libcusolver=11.4.1.48
+- libcusparse-dev=11.7.5.86
+- libcusparse=11.7.5.86
+- libfaiss>=1.7.1
+- nccl>=2.9.9
+- ninja
+- nlohmann_json>=3.11.2
+- scikit-build>=0.13.1
+- sysroot_linux-64==2.17
+name: bench_ann_cuda-118_arch-x86_64
diff --git a/conda/recipes/libraft/build_libraft_nn.sh b/conda/recipes/libraft/build_libraft.sh
similarity index 54%
rename from conda/recipes/libraft/build_libraft_nn.sh
rename to conda/recipes/libraft/build_libraft.sh
index 5347bfbc20..237e47eb26 100644
--- a/conda/recipes/libraft/build_libraft_nn.sh
+++ b/conda/recipes/libraft/build_libraft.sh
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-./build.sh libraft --allgpuarch --compile-nn --no-nvtx
+./build.sh libraft --allgpuarch --compile-lib --no-nvtx
diff --git a/conda/recipes/libraft/build_libraft_distance.sh b/conda/recipes/libraft/build_libraft_distance.sh
deleted file mode 100644
index 27a1ee43c8..0000000000
--- a/conda/recipes/libraft/build_libraft_distance.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/usr/bin/env bash
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-
-./build.sh libraft --allgpuarch --compile-dist --no-nvtx
diff --git a/conda/recipes/libraft/build_libraft_nn_bench.sh b/conda/recipes/libraft/build_libraft_nn_bench.sh
new file mode 100644
index 0000000000..dc6250f0f4
--- /dev/null
+++ b/conda/recipes/libraft/build_libraft_nn_bench.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+./build.sh tests bench-ann --allgpuarch --no-nvtx
+cmake --install cpp/build --component ann_bench
diff --git a/conda/recipes/libraft/build_libraft_template.sh b/conda/recipes/libraft/build_libraft_template.sh
new file mode 100644
index 0000000000..9759402884
--- /dev/null
+++ b/conda/recipes/libraft/build_libraft_template.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+
+# Just building template so we verify it uses libraft.so and fail if it doesn't build
+./build.sh template
\ No newline at end of file
diff --git a/conda/recipes/libraft/build_libraft_tests.sh b/conda/recipes/libraft/build_libraft_tests.sh
index aa2c1b3e89..cc28f93fb8 100644
--- a/conda/recipes/libraft/build_libraft_tests.sh
+++ b/conda/recipes/libraft/build_libraft_tests.sh
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
-./build.sh tests bench --allgpuarch --no-nvtx
+./build.sh tests bench-prims --allgpuarch --no-nvtx
cmake --install cpp/build --component testing
diff --git a/conda/recipes/libraft/conda_build_config.yaml b/conda/recipes/libraft/conda_build_config.yaml
index ca213dc317..2a66f213a7 100644
--- a/conda/recipes/libraft/conda_build_config.yaml
+++ b/conda/recipes/libraft/conda_build_config.yaml
@@ -19,8 +19,17 @@ nccl_version:
gtest_version:
- "=1.10.0"
-libfaiss_version:
- - "1.7.2 *_cuda"
+glog_version:
+ - ">=0.6.0"
+
+faiss_version:
+ - ">=1.7.1"
+
+h5py_version:
+ - ">=3.8.0"
+
+nlohmann_json_version:
+ - ">=3.11.2"
# The CTK libraries below are missing from the conda-forge::cudatoolkit
# package. The "*_host_*" version specifiers correspond to `11.8` packages and the
diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml
index 771c7d55b8..7859807777 100644
--- a/conda/recipes/libraft/meta.yaml
+++ b/conda/recipes/libraft/meta.yaml
@@ -76,9 +76,9 @@ outputs:
home: https://rapids.ai/
license: Apache-2.0
summary: libraft-headers library
- - name: libraft-distance
+ - name: libraft
version: {{ version }}
- script: build_libraft_distance.sh
+ script: build_libraft.sh
build:
script_env: *script_env
number: {{ GIT_DESCRIBE_NUMBER }}
@@ -109,10 +109,10 @@ outputs:
about:
home: https://rapids.ai/
license: Apache-2.0
- summary: libraft-distance library
- - name: libraft-nn
+ summary: libraft library
+ - name: libraft-tests
version: {{ version }}
- script: build_libraft_nn.sh
+ script: build_libraft_tests.sh
build:
script_env: *script_env
number: {{ GIT_DESCRIBE_NUMBER }}
@@ -128,10 +128,11 @@ outputs:
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
+ - {{ pin_subpackage('libraft', exact=True) }}
- {{ pin_subpackage('libraft-headers', exact=True) }}
- cuda-profiler-api {{ cuda_profiler_api_host_version }}
- - faiss-proc=*=cuda
- - lapack
+ - gmock {{ gtest_version }}
+ - gtest {{ gtest_version }}
- libcublas {{ libcublas_host_version }}
- libcublas-dev {{ libcublas_host_version }}
- libcurand {{ libcurand_host_version }}
@@ -140,18 +141,18 @@ outputs:
- libcusolver-dev {{ libcusolver_host_version }}
- libcusparse {{ libcusparse_host_version }}
- libcusparse-dev {{ libcusparse_host_version }}
- - libfaiss {{ libfaiss_version }}
run:
- - faiss-proc=*=cuda
- - libfaiss {{ libfaiss_version }}
+ - {{ pin_subpackage('libraft', exact=True) }}
- {{ pin_subpackage('libraft-headers', exact=True) }}
+ - gmock {{ gtest_version }}
+ - gtest {{ gtest_version }}
about:
home: https://rapids.ai/
license: Apache-2.0
- summary: libraft-nn library
- - name: libraft-tests
+ summary: libraft tests
+ - name: libraft-template
version: {{ version }}
- script: build_libraft_tests.sh
+ script: build_libraft_template.sh
build:
script_env: *script_env
number: {{ GIT_DESCRIBE_NUMBER }}
@@ -167,12 +168,9 @@ outputs:
- ninja
- sysroot_{{ target_platform }} {{ sysroot_version }}
host:
- - {{ pin_subpackage('libraft-distance', exact=True) }}
+ - {{ pin_subpackage('libraft', exact=True) }}
- {{ pin_subpackage('libraft-headers', exact=True) }}
- - {{ pin_subpackage('libraft-nn', exact=True) }}
- cuda-profiler-api {{ cuda_profiler_api_host_version }}
- - gmock {{ gtest_version }}
- - gtest {{ gtest_version }}
- libcublas {{ libcublas_host_version }}
- libcublas-dev {{ libcublas_host_version }}
- libcurand {{ libcurand_host_version }}
@@ -182,12 +180,53 @@ outputs:
- libcusparse {{ libcusparse_host_version }}
- libcusparse-dev {{ libcusparse_host_version }}
run:
- - {{ pin_subpackage('libraft-distance', exact=True) }}
+ - {{ pin_subpackage('libraft', exact=True) }}
- {{ pin_subpackage('libraft-headers', exact=True) }}
- - {{ pin_subpackage('libraft-nn', exact=True) }}
- - gmock {{ gtest_version }}
- - gtest {{ gtest_version }}
about:
home: https://rapids.ai/
license: Apache-2.0
- summary: libraft tests
+ summary: libraft template
+ - name: libraft-ann-bench
+ version: {{ version }}
+ script: build_libraft_nn_bench.sh
+ build:
+ script_env: *script_env
+ number: {{ GIT_DESCRIBE_NUMBER }}
+ string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+ ignore_run_exports_from:
+ - {{ compiler('cuda') }}
+ requirements:
+ build:
+ - {{ compiler('c') }}
+ - {{ compiler('cuda') }} {{ cuda_version }}
+ - {{ compiler('cxx') }}
+ - cmake {{ cmake_version }}
+ - ninja
+ - sysroot_{{ target_platform }} {{ sysroot_version }}
+ host:
+ - {{ pin_subpackage('libraft', exact=True) }}
+ - {{ pin_subpackage('libraft-headers', exact=True) }}
+ - cuda-profiler-api {{ cuda_profiler_api_host_version }}
+ - libcublas {{ libcublas_host_version }}
+ - libcublas-dev {{ libcublas_host_version }}
+ - libcurand {{ libcurand_host_version }}
+ - libcurand-dev {{ libcurand_host_version }}
+ - libcusolver {{ libcusolver_host_version }}
+ - libcusolver-dev {{ libcusolver_host_version }}
+ - libcusparse {{ libcusparse_host_version }}
+ - libcusparse-dev {{ libcusparse_host_version }}
+ - glog {{ glog_version }}
+ - nlohmann_json {{ nlohmann_json_version }}
+ - libfaiss>=1.7.1
+ - faiss-proc=*=cuda
+ run:
+ - {{ pin_subpackage('libraft', exact=True) }}
+ - {{ pin_subpackage('libraft-headers', exact=True) }}
+ - glog {{ glog_version }}
+ - faiss-proc=*=cuda
+ - libfaiss {{ faiss_version }}
+ - h5py {{ h5py_version }}
+ about:
+ home: https://rapids.ai/
+ license: Apache-2.0
+ summary: libraft ann bench
diff --git a/conda/recipes/pylibraft/meta.yaml b/conda/recipes/pylibraft/meta.yaml
index 4a9b98ac75..7730801801 100644
--- a/conda/recipes/pylibraft/meta.yaml
+++ b/conda/recipes/pylibraft/meta.yaml
@@ -34,8 +34,9 @@ requirements:
- cuda-python >=11.7.1,<12.0
- cudatoolkit ={{ cuda_version }}
- cython >=0.29,<0.30
- - libraft-distance {{ version }}
+ - libraft {{ version }}
- libraft-headers {{ version }}
+ - numpy >=1.21
- python x.x
- rmm ={{ minor_version }}
- scikit-build >=0.13.1
@@ -43,7 +44,7 @@ requirements:
run:
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
- cuda-python >=11.7.1,<12.0
- - libraft-distance {{ version }}
+ - libraft {{ version }}
- libraft-headers {{ version }}
- python x.x
diff --git a/conda/recipes/raft-dask/conda_build_config.yaml b/conda/recipes/raft-dask/conda_build_config.yaml
index ef22522116..778b187870 100644
--- a/conda/recipes/raft-dask/conda_build_config.yaml
+++ b/conda/recipes/raft-dask/conda_build_config.yaml
@@ -11,7 +11,7 @@ sysroot_version:
- "2.17"
ucx_version:
- - "1.13.0"
+ - ">=1.13.0,<1.15.0"
ucx_py_version:
- "0.31.*"
diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml
index b387f0f47c..59a67fe148 100644
--- a/conda/recipes/raft-dask/meta.yaml
+++ b/conda/recipes/raft-dask/meta.yaml
@@ -54,7 +54,7 @@ requirements:
- pylibraft {{ version }}
- python x.x
- rmm ={{ minor_version }}
- - ucx >={{ ucx_version }}
+ - ucx {{ ucx_version }}
- ucx-proc=*=gpu
- ucx-py {{ ucx_py_version }}
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2999045a0c..2e9c726b8e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -46,63 +46,47 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
option(BUILD_SHARED_LIBS "Build raft shared libraries" ON)
option(BUILD_TESTS "Build raft unit-tests" ON)
-option(BUILD_BENCH "Build raft C++ benchmark tests" OFF)
+option(BUILD_PRIMS_BENCH "Build raft C++ benchmark tests" OFF)
+option(BUILD_ANN_BENCH "Build raft ann benchmarks" OFF)
option(CUDA_ENABLE_KERNELINFO "Enable kernel resource usage info" OFF)
option(CUDA_ENABLE_LINEINFO
"Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler)" OFF
)
option(CUDA_STATIC_RUNTIME "Statically link the CUDA toolkit runtime and libraries" OFF)
+option(CUDA_LOG_COMPILE_TIME "Write a log of compilation times to nvcc_compile_log.csv" OFF)
option(DETECT_CONDA_ENV "Enable detection of conda environment for dependencies" ON)
option(DISABLE_DEPRECATION_WARNINGS "Disable deprecaction warnings " ON)
option(DISABLE_OPENMP "Disable OpenMP" OFF)
option(RAFT_NVTX "Enable nvtx markers" OFF)
-set(RAFT_COMPILE_LIBRARIES_DEFAULT OFF)
-if(BUILD_TESTS OR BUILD_BENCH)
- set(RAFT_COMPILE_LIBRARIES_DEFAULT ON)
-endif()
-option(RAFT_COMPILE_LIBRARIES "Enable building raft shared library instantiations"
- ${RAFT_COMPILE_LIBRARIES_DEFAULT}
-)
-option(
- RAFT_COMPILE_NN_LIBRARY "Enable building raft nearest neighbors shared library instantiations"
- ${RAFT_COMPILE_LIBRARIES}
-)
-option(RAFT_COMPILE_DIST_LIBRARY "Enable building raft distant shared library instantiations"
- ${RAFT_COMPILE_LIBRARIES}
+set(RAFT_COMPILE_LIBRARY_DEFAULT OFF)
+if(BUILD_TESTS
+ OR BUILD_PRIMS_BENCH
+ OR BUILD_ANN_BENCH
)
-option(RAFT_ENABLE_NN_DEPENDENCIES "Search for raft::nn dependencies like faiss"
- ${RAFT_COMPILE_NN_LIBRARY}
+ set(RAFT_COMPILE_LIBRARY_DEFAULT ON)
+endif()
+option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations"
+ ${RAFT_COMPILE_LIBRARY_DEFAULT}
)
-option(RAFT_ENABLE_thrust_DEPENDENCY "Enable Thrust dependency" ON)
-
-if(BUILD_TESTS OR BUILD_BENCH)
+if(BUILD_TESTS
+ OR BUILD_PRIMS_BENCH
+ OR BUILD_ANN_BENCH
+)
# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs
# to have different values for the `Threads::Threads` target. Setting this flag ensures
# `Threads::Threads` is the same value in first run and subsequent runs.
set(THREADS_PREFER_PTHREAD_FLAG ON)
endif()
-if(BUILD_TESTS AND NOT RAFT_ENABLE_thrust_DEPENDENCY)
- message(VERBOSE "RAFT: BUILD_TESTS is enabled, overriding RAFT_ENABLE_thrust_DEPENDENCY")
- set(RAFT_ENABLE_thrust_DEPENDENCY ON)
-endif()
-
-option(RAFT_EXCLUDE_FAISS_FROM_ALL "Exclude FAISS targets from RAFT's 'all' target" ON)
-
include(CMakeDependentOption)
-cmake_dependent_option(
- RAFT_USE_FAISS_STATIC
- "Build and statically link the FAISS library for nearest neighbors search on GPU"
- ON
- RAFT_COMPILE_LIBRARIES
- OFF
-)
+# cmake_dependent_option( RAFT_USE_FAISS_STATIC "Build and statically link the FAISS library for
+# nearest neighbors search on GPU" ON RAFT_COMPILE_LIBRARY OFF )
message(VERBOSE "RAFT: Building optional components: ${raft_FIND_COMPONENTS}")
message(VERBOSE "RAFT: Build RAFT unit-tests: ${BUILD_TESTS}")
-message(VERBOSE "RAFT: Building raft C++ benchmarks: ${BUILD_BENCH}")
+message(VERBOSE "RAFT: Building raft C++ benchmarks: ${BUILD_PRIMS_BENCH}")
message(VERBOSE "RAFT: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}")
message(VERBOSE "RAFT: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS})
message(VERBOSE "RAFT: Disable OpenMP: ${DISABLE_OPENMP}")
@@ -168,36 +152,22 @@ include(cmake/modules/ConfigureCUDA.cmake)
# ##################################################################################################
# * Requirements -------------------------------------------------------------
-if(RAFT_COMPILE_LIBRARIES)
- set(RAFT_COMPILE_DIST_LIBRARY ON)
- set(RAFT_COMPILE_NN_LIBRARY ON)
-endif()
-
-if(RAFT_COMPILE_DIST_LIBRARY OR distance IN_LIST raft_FIND_COMPONENTS)
- set(RAFT_ENABLE_cuco_DEPENDENCY ON)
-endif()
-
# add third party dependencies using CPM
rapids_cpm_init()
# thrust before rmm/cuco so we get the right version of thrust/cub
include(cmake/thirdparty/get_thrust.cmake)
include(cmake/thirdparty/get_rmm.cmake)
-include(cmake/thirdparty/get_faiss.cmake)
include(cmake/thirdparty/get_cutlass.cmake)
-if(RAFT_ENABLE_cuco_DEPENDENCY)
- include(${rapids-cmake-dir}/cpm/cuco.cmake)
- rapids_cpm_cuco(
- BUILD_EXPORT_SET raft-distance-lib-exports INSTALL_EXPORT_SET raft-distance-lib-exports
- )
-endif()
+include(${rapids-cmake-dir}/cpm/cuco.cmake)
+rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports)
if(BUILD_TESTS)
include(cmake/thirdparty/get_gtest.cmake)
endif()
-if(BUILD_BENCH)
+if(BUILD_PRIMS_BENCH)
include(${rapids-cmake-dir}/cpm/gbench.cmake)
rapids_cpm_gbench()
endif()
@@ -215,11 +185,13 @@ target_include_directories(
target_link_libraries(
raft
INTERFACE rmm::rmm
+ cuco::cuco
+ nvidia::cutlass::cutlass
CUDA::cublas${_ctk_static_suffix}
CUDA::curand${_ctk_static_suffix}
CUDA::cusolver${_ctk_static_suffix}
CUDA::cusparse${_ctk_static_suffix}
- $<$:raft::Thrust>
+ raft::Thrust
)
target_compile_features(raft INTERFACE cxx_std_17 $)
@@ -237,7 +209,7 @@ else()
target_compile_definitions(raft INTERFACE RAFT_SYSTEM_LITTLE_ENDIAN=1)
endif()
-if(RAFT_COMPILE_DIST_LIBRARY OR RAFT_COMPILE_NN_LIBRARY)
+if(RAFT_COMPILE_LIBRARY)
file(
WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld"
[=[
@@ -281,148 +253,200 @@ target_compile_definitions(raft::raft INTERFACE $<$:NVTX_ENAB
endif()
# ##################################################################################################
-# * raft_distance ------------------------------------------------------------ TODO: Currently, this
+# * raft_compiled ------------------------------------------------------------ TODO: Currently, this
# package also contains the 'random' namespace (for rmat logic) We couldn't get this to work
# properly due to strange CI failures as noticed in the PR#778. In the long term, we should rename
# this package to `raft_compiled` in order to have a single pre-compiled raft package for those
# who need it.
-add_library(raft_distance INTERFACE)
+add_library(raft_compiled INTERFACE)
-if(TARGET raft_distance AND (NOT TARGET raft::distance))
- add_library(raft::distance ALIAS raft_distance)
+if(TARGET raft_compiled AND (NOT TARGET raft::compiled))
+ add_library(raft::compiled ALIAS raft_compiled)
endif()
-set_target_properties(raft_distance PROPERTIES EXPORT_NAME distance)
+set_target_properties(raft_compiled PROPERTIES EXPORT_NAME compiled)
-if(RAFT_COMPILE_DIST_LIBRARY)
+if(RAFT_COMPILE_LIBRARY)
add_library(
- raft_distance_lib
- src/distance/distance/pairwise_distance.cu
- src/distance/distance/fused_l2_min_arg.cu
- src/distance/cluster/update_centroids_float.cu
- src/distance/cluster/update_centroids_double.cu
- src/distance/cluster/cluster_cost_float.cu
- src/distance/cluster/cluster_cost_double.cu
- src/distance/neighbors/refine_d_int64_t_float.cu
- src/distance/neighbors/refine_d_int64_t_int8_t.cu
- src/distance/neighbors/refine_d_int64_t_uint8_t.cu
- src/distance/neighbors/refine_h_int64_t_float.cu
- src/distance/neighbors/refine_h_int64_t_int8_t.cu
- src/distance/neighbors/refine_h_int64_t_uint8_t.cu
- src/distance/neighbors/specializations/refine_d_int64_t_float.cu
- src/distance/neighbors/specializations/refine_d_int64_t_int8_t.cu
- src/distance/neighbors/specializations/refine_d_int64_t_uint8_t.cu
- src/distance/neighbors/specializations/refine_h_int64_t_float.cu
- src/distance/neighbors/specializations/refine_h_int64_t_int8_t.cu
- src/distance/neighbors/specializations/refine_h_int64_t_uint8_t.cu
- src/distance/cluster/kmeans_fit_float.cu
- src/distance/cluster/kmeans_fit_double.cu
- src/distance/cluster/kmeans_init_plus_plus_double.cu
- src/distance/cluster/kmeans_init_plus_plus_float.cu
- src/distance/distance/specializations/detail/canberra_double_double_double_int.cu
- src/distance/distance/specializations/detail/canberra_float_float_float_int.cu
- src/distance/distance/specializations/detail/correlation_double_double_double_int.cu
- src/distance/distance/specializations/detail/correlation_float_float_float_int.cu
- src/distance/distance/specializations/detail/cosine_double_double_double_int.cu
- src/distance/distance/specializations/detail/cosine_float_float_float_int.cu
- src/distance/distance/specializations/detail/hamming_unexpanded_double_double_double_int.cu
- src/distance/distance/specializations/detail/hamming_unexpanded_float_float_float_int.cu
- src/distance/distance/specializations/detail/hellinger_expanded_float_float_float_int.cu
- src/distance/distance/specializations/detail/hellinger_expanded_double_double_double_int.cu
- src/distance/distance/specializations/detail/inner_product_float_float_float_int.cu
- src/distance/distance/specializations/detail/inner_product_double_double_double_int.cu
- src/distance/distance/specializations/detail/jensen_shannon_float_float_float_int.cu
- src/distance/distance/specializations/detail/jensen_shannon_double_double_double_int.cu
- src/distance/distance/specializations/detail/kernels/gram_matrix_base_double.cu
- src/distance/distance/specializations/detail/kernels/gram_matrix_base_float.cu
- src/distance/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu
- src/distance/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu
+ raft_lib
+ src/distance/pairwise_distance.cu
+ src/distance/fused_l2_min_arg.cu
+ src/cluster/update_centroids_float.cu
+ src/cluster/update_centroids_double.cu
+ src/cluster/cluster_cost_float.cu
+ src/cluster/cluster_cost_double.cu
+ src/neighbors/refine_d_int64_t_float.cu
+ src/neighbors/refine_d_int64_t_int8_t.cu
+ src/neighbors/refine_d_int64_t_uint8_t.cu
+ src/neighbors/refine_h_int64_t_float.cu
+ src/neighbors/refine_h_int64_t_int8_t.cu
+ src/neighbors/refine_h_int64_t_uint8_t.cu
+ src/neighbors/specializations/refine_d_int64_t_float.cu
+ src/neighbors/specializations/refine_d_int64_t_int8_t.cu
+ src/neighbors/specializations/refine_d_int64_t_uint8_t.cu
+ src/neighbors/specializations/refine_h_int64_t_float.cu
+ src/neighbors/specializations/refine_h_int64_t_int8_t.cu
+ src/neighbors/specializations/refine_h_int64_t_uint8_t.cu
+ src/cluster/kmeans_fit_float.cu
+ src/cluster/kmeans_fit_double.cu
+ src/cluster/kmeans_init_plus_plus_double.cu
+ src/cluster/kmeans_init_plus_plus_float.cu
+ src/distance/specializations/detail/canberra_double_double_double_int.cu
+ src/distance/specializations/detail/canberra_float_float_float_int.cu
+ src/distance/specializations/detail/correlation_double_double_double_int.cu
+ src/distance/specializations/detail/correlation_float_float_float_int.cu
+ src/distance/specializations/detail/cosine_double_double_double_int.cu
+ src/distance/specializations/detail/cosine_float_float_float_int.cu
+ src/distance/specializations/detail/hamming_unexpanded_double_double_double_int.cu
+ src/distance/specializations/detail/hamming_unexpanded_float_float_float_int.cu
+ src/distance/specializations/detail/hellinger_expanded_float_float_float_int.cu
+ src/distance/specializations/detail/hellinger_expanded_double_double_double_int.cu
+ src/distance/specializations/detail/inner_product_float_float_float_int.cu
+ src/distance/specializations/detail/inner_product_double_double_double_int.cu
+ src/distance/specializations/detail/jensen_shannon_float_float_float_int.cu
+ src/distance/specializations/detail/jensen_shannon_double_double_double_int.cu
+ src/distance/specializations/detail/kernels/gram_matrix_base_double.cu
+ src/distance/specializations/detail/kernels/gram_matrix_base_float.cu
+ src/distance/specializations/detail/kernels/polynomial_kernel_double_int.cu
+ src/distance/specializations/detail/kernels/polynomial_kernel_float_int.cu
# These are somehow missing a kernel definition which is causing a compile error.
# src/distance/specializations/detail/kernels/rbf_kernel_double.cu
# src/distance/specializations/detail/kernels/rbf_kernel_float.cu
- src/distance/distance/specializations/detail/kernels/tanh_kernel_double.cu
- src/distance/distance/specializations/detail/kernels/tanh_kernel_float.cu
- src/distance/distance/specializations/detail/kl_divergence_float_float_float_int.cu
- src/distance/distance/specializations/detail/kl_divergence_double_double_double_int.cu
- src/distance/distance/specializations/detail/l1_float_float_float_int.cu
- src/distance/distance/specializations/detail/l1_double_double_double_int.cu
- src/distance/distance/specializations/detail/l2_expanded_float_float_float_int.cu
- src/distance/distance/specializations/detail/l2_expanded_double_double_double_int.cu
- src/distance/distance/specializations/detail/l2_sqrt_expanded_float_float_float_int.cu
- src/distance/distance/specializations/detail/l2_sqrt_expanded_double_double_double_int.cu
- src/distance/distance/specializations/detail/l2_sqrt_unexpanded_float_float_float_int.cu
- src/distance/distance/specializations/detail/l2_sqrt_unexpanded_double_double_double_int.cu
- src/distance/distance/specializations/detail/l2_unexpanded_double_double_double_int.cu
- src/distance/distance/specializations/detail/l2_unexpanded_float_float_float_int.cu
- src/distance/distance/specializations/detail/l_inf_double_double_double_int.cu
- src/distance/distance/specializations/detail/l_inf_float_float_float_int.cu
- src/distance/distance/specializations/detail/lp_unexpanded_double_double_double_int.cu
- src/distance/distance/specializations/detail/lp_unexpanded_float_float_float_int.cu
- src/distance/distance/specializations/detail/russel_rao_double_double_double_int.cu
- src/distance/distance/specializations/detail/russel_rao_float_float_float_int.cu
- src/distance/distance/specializations/fused_l2_nn_double_int.cu
- src/distance/distance/specializations/fused_l2_nn_double_int64.cu
- src/distance/distance/specializations/fused_l2_nn_float_int.cu
- src/distance/distance/specializations/fused_l2_nn_float_int64.cu
- src/distance/matrix/specializations/detail/select_k_float_uint32_t.cu
- src/distance/matrix/specializations/detail/select_k_float_int64_t.cu
- src/distance/matrix/specializations/detail/select_k_half_uint32_t.cu
- src/distance/matrix/specializations/detail/select_k_half_int64_t.cu
- src/distance/neighbors/ivf_flat_search.cu
- src/distance/neighbors/ivf_flat_build.cu
- src/distance/neighbors/specializations/ivfflat_build_float_int64_t.cu
- src/distance/neighbors/specializations/ivfflat_build_int8_t_int64_t.cu
- src/distance/neighbors/specializations/ivfflat_build_uint8_t_int64_t.cu
- src/distance/neighbors/specializations/ivfflat_extend_float_int64_t.cu
- src/distance/neighbors/specializations/ivfflat_extend_int8_t_int64_t.cu
- src/distance/neighbors/specializations/ivfflat_extend_uint8_t_int64_t.cu
- src/distance/neighbors/specializations/ivfflat_search_float_int64_t.cu
- src/distance/neighbors/specializations/ivfflat_search_int8_t_int64_t.cu
- src/distance/neighbors/specializations/ivfflat_search_uint8_t_int64_t.cu
- src/distance/neighbors/ivfpq_build.cu
- src/distance/neighbors/ivfpq_deserialize.cu
- src/distance/neighbors/ivfpq_serialize.cu
- src/distance/neighbors/ivfpq_search_float_int64_t.cu
- src/distance/neighbors/ivfpq_search_int8_t_int64_t.cu
- src/distance/neighbors/ivfpq_search_uint8_t_int64_t.cu
- src/distance/neighbors/specializations/ivfpq_build_float_int64_t.cu
- src/distance/neighbors/specializations/ivfpq_build_int8_t_int64_t.cu
- src/distance/neighbors/specializations/ivfpq_build_uint8_t_int64_t.cu
- src/distance/neighbors/specializations/ivfpq_extend_float_int64_t.cu
- src/distance/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu
- src/distance/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu
- src/distance/neighbors/specializations/ivfpq_search_float_int64_t.cu
- src/distance/neighbors/specializations/ivfpq_search_int8_t_int64_t.cu
- src/distance/neighbors/specializations/ivfpq_search_uint8_t_int64_t.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_float_fast.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_float_no_basediff.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_float_no_smem_lut.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_fp8s_fast.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_fp8s_no_basediff.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_fp8s_no_smem_lut.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_fp8u_fast.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_fp8u_no_basediff.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_fp8u_no_smem_lut.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_half_fast.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_half_no_basediff.cu
- src/distance/neighbors/specializations/detail/compute_similarity_float_half_no_smem_lut.cu
- src/distance/neighbors/specializations/detail/compute_similarity_half_fp8s_fast.cu
- src/distance/neighbors/specializations/detail/compute_similarity_half_fp8s_no_basediff.cu
- src/distance/neighbors/specializations/detail/compute_similarity_half_fp8s_no_smem_lut.cu
- src/distance/neighbors/specializations/detail/compute_similarity_half_fp8u_fast.cu
- src/distance/neighbors/specializations/detail/compute_similarity_half_fp8u_no_basediff.cu
- src/distance/neighbors/specializations/detail/compute_similarity_half_fp8u_no_smem_lut.cu
- src/distance/neighbors/specializations/detail/compute_similarity_half_half_fast.cu
- src/distance/neighbors/specializations/detail/compute_similarity_half_half_no_basediff.cu
- src/distance/neighbors/specializations/detail/compute_similarity_half_half_no_smem_lut.cu
- src/distance/random/rmat_rectangular_generator_int_double.cu
- src/distance/random/rmat_rectangular_generator_int64_double.cu
- src/distance/random/rmat_rectangular_generator_int_float.cu
- src/distance/random/rmat_rectangular_generator_int64_float.cu
+ src/neighbors/brute_force_knn_int64_t_float.cu
+ src/distance/specializations/detail/kernels/tanh_kernel_double.cu
+ src/distance/specializations/detail/kernels/tanh_kernel_float.cu
+ src/distance/specializations/detail/kl_divergence_float_float_float_int.cu
+ src/distance/specializations/detail/kl_divergence_double_double_double_int.cu
+ src/distance/specializations/detail/l1_float_float_float_int.cu
+ src/distance/specializations/detail/l1_double_double_double_int.cu
+ src/distance/specializations/detail/l2_expanded_float_float_float_int.cu
+ src/distance/specializations/detail/l2_expanded_double_double_double_int.cu
+ src/distance/specializations/detail/l2_unexpanded_double_double_double_int.cu
+ src/distance/specializations/detail/l2_unexpanded_float_float_float_int.cu
+ src/distance/specializations/detail/l_inf_double_double_double_int.cu
+ src/distance/specializations/detail/l_inf_float_float_float_int.cu
+ src/distance/specializations/detail/lp_unexpanded_double_double_double_int.cu
+ src/distance/specializations/detail/lp_unexpanded_float_float_float_int.cu
+ src/distance/specializations/detail/russel_rao_double_double_double_int.cu
+ src/distance/specializations/detail/russel_rao_float_float_float_int.cu
+ src/distance/specializations/fused_l2_nn_double_int.cu
+ src/distance/specializations/fused_l2_nn_double_int64.cu
+ src/distance/specializations/fused_l2_nn_float_int.cu
+ src/distance/specializations/fused_l2_nn_float_int64.cu
+ src/matrix/specializations/detail/select_k_float_uint32_t.cu
+ src/matrix/specializations/detail/select_k_float_int64_t.cu
+ src/matrix/specializations/detail/select_k_half_uint32_t.cu
+ src/matrix/specializations/detail/select_k_half_int64_t.cu
+ src/neighbors/ivfpq_build.cu
+ src/neighbors/ivfpq_deserialize.cu
+ src/neighbors/ivfpq_serialize.cu
+ src/neighbors/ivfpq_search_float_int64_t.cu
+ src/neighbors/ivfpq_search_int8_t_int64_t.cu
+ src/neighbors/ivfpq_search_uint8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_build_float_int64_t.cu
+ src/neighbors/specializations/ivfpq_build_int8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_build_uint8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_extend_float_int64_t.cu
+ src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_search_float_int64_t.cu
+ src/neighbors/specializations/ivfpq_search_int8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_search_uint8_t_int64_t.cu
+ src/neighbors/specializations/detail/brute_force_knn_impl_long_float_int.cu
+ src/neighbors/specializations/detail/brute_force_knn_impl_long_float_uint.cu
+ src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_int.cu
+ src/neighbors/specializations/detail/brute_force_knn_impl_uint_float_uint.cu
+ src/neighbors/specializations/detail/compute_similarity_float_float_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_float_float_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_float_float_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8s_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8u_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_float_half_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_float_half_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_float_half_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8s_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8u_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_half_half_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_half_half_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_half_half_no_smem_lut.cu
+ src/random/rmat_rectangular_generator_int_double.cu
+ src/random/rmat_rectangular_generator_int64_double.cu
+ src/random/rmat_rectangular_generator_int_float.cu
+ src/random/rmat_rectangular_generator_int64_float.cu
+ src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_2d.cu
+ src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_2d.cu
+ src/neighbors/specializations/detail/ball_cover_lowdim_pass_one_3d.cu
+ src/neighbors/specializations/detail/ball_cover_lowdim_pass_two_3d.cu
+ src/neighbors/specializations/ball_cover_all_knn_query.cu
+ src/neighbors/specializations/ball_cover_build_index.cu
+ src/neighbors/specializations/ball_cover_knn_query.cu
+ src/neighbors/specializations/fused_l2_knn_long_float_true.cu
+ src/neighbors/specializations/fused_l2_knn_long_float_false.cu
+ src/neighbors/specializations/fused_l2_knn_int_float_true.cu
+ src/neighbors/specializations/fused_l2_knn_int_float_false.cu
+ src/neighbors/ivf_flat_search.cu
+ src/neighbors/ivf_flat_build.cu
+ src/neighbors/specializations/ivfflat_build_float_int64_t.cu
+ src/neighbors/specializations/ivfflat_build_int8_t_int64_t.cu
+ src/neighbors/specializations/ivfflat_build_uint8_t_int64_t.cu
+ src/neighbors/specializations/ivfflat_extend_float_int64_t.cu
+ src/neighbors/specializations/ivfflat_extend_int8_t_int64_t.cu
+ src/neighbors/specializations/ivfflat_extend_uint8_t_int64_t.cu
+ src/neighbors/specializations/ivfflat_search_float_int64_t.cu
+ src/neighbors/specializations/ivfflat_search_int8_t_int64_t.cu
+ src/neighbors/specializations/ivfflat_search_uint8_t_int64_t.cu
+ src/neighbors/ivfpq_build.cu
+ src/neighbors/ivfpq_deserialize.cu
+ src/neighbors/ivfpq_serialize.cu
+ src/neighbors/ivfpq_search_float_int64_t.cu
+ src/neighbors/ivfpq_search_int8_t_int64_t.cu
+ src/neighbors/ivfpq_search_uint8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_build_float_int64_t.cu
+ src/neighbors/specializations/ivfpq_build_int8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_build_uint8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_extend_float_int64_t.cu
+ src/neighbors/specializations/ivfpq_extend_int8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_extend_uint8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_search_float_int64_t.cu
+ src/neighbors/specializations/ivfpq_search_int8_t_int64_t.cu
+ src/neighbors/specializations/ivfpq_search_uint8_t_int64_t.cu
+ src/neighbors/specializations/detail/compute_similarity_float_float_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_float_float_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_float_float_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8s_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8s_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8u_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_float_fp8u_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_float_half_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_float_half_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_float_half_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8s_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8s_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8u_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_half_fp8u_no_smem_lut.cu
+ src/neighbors/specializations/detail/compute_similarity_half_half_fast.cu
+ src/neighbors/specializations/detail/compute_similarity_half_half_no_basediff.cu
+ src/neighbors/specializations/detail/compute_similarity_half_half_no_smem_lut.cu
+ src/random/rmat_rectangular_generator_int_double.cu
+ src/random/rmat_rectangular_generator_int64_double.cu
+ src/random/rmat_rectangular_generator_int_float.cu
+ src/random/rmat_rectangular_generator_int64_float.cu
)
set_target_properties(
- raft_distance_lib
- PROPERTIES OUTPUT_NAME raft_distance
+ raft_lib
+ PROPERTIES OUTPUT_NAME raft
BUILD_RPATH "\$ORIGIN"
INSTALL_RPATH "\$ORIGIN"
CXX_STANDARD 17
@@ -433,95 +457,23 @@ if(RAFT_COMPILE_DIST_LIBRARY)
INTERFACE_POSITION_INDEPENDENT_CODE ON
)
- target_link_libraries(
- raft_distance_lib
- PUBLIC raft::raft cuco::cuco
- PRIVATE nvidia::cutlass::cutlass $
- )
+ target_link_libraries(raft_lib PUBLIC raft::raft $)
target_compile_options(
- raft_distance_lib PRIVATE "$<$:${RAFT_CXX_FLAGS}>"
- "$<$:${RAFT_CUDA_FLAGS}>"
- )
- target_compile_definitions(raft_distance_lib INTERFACE "RAFT_DISTANCE_COMPILED")
-
- # ensure CUDA symbols aren't relocated to the middle of the debug build binaries
- target_link_options(raft_distance_lib PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
-
-endif()
-
-if(TARGET raft_distance_lib AND (NOT TARGET raft::raft_distance_lib))
- add_library(raft::raft_distance_lib ALIAS raft_distance_lib)
-endif()
-
-target_link_libraries(
- raft_distance INTERFACE raft::raft $
- nvidia::cutlass::cutlass
-)
-
-# ##################################################################################################
-# * raft_nn ------------------------------------------------------------------
-add_library(raft_nn INTERFACE)
-
-if(TARGET raft_nn AND (NOT TARGET raft::nn))
- add_library(raft::nn ALIAS raft_nn)
-endif()
-
-set_target_properties(raft_nn PROPERTIES EXPORT_NAME nn)
-
-if(RAFT_COMPILE_NN_LIBRARY)
- add_library(
- raft_nn_lib
- src/nn/specializations/detail/ball_cover_lowdim_pass_one_2d.cu
- src/nn/specializations/detail/ball_cover_lowdim_pass_two_2d.cu
- src/nn/specializations/detail/ball_cover_lowdim_pass_one_3d.cu
- src/nn/specializations/detail/ball_cover_lowdim_pass_two_3d.cu
- src/nn/specializations/ball_cover_all_knn_query.cu
- src/nn/specializations/ball_cover_build_index.cu
- src/nn/specializations/ball_cover_knn_query.cu
- src/nn/specializations/fused_l2_knn_long_float_true.cu
- src/nn/specializations/fused_l2_knn_long_float_false.cu
- src/nn/specializations/fused_l2_knn_int_float_true.cu
- src/nn/specializations/fused_l2_knn_int_float_false.cu
- src/nn/specializations/brute_force_knn_long_float_int.cu
- src/nn/specializations/brute_force_knn_long_float_uint.cu
- src/nn/specializations/brute_force_knn_uint32_t_float_int.cu
- src/nn/specializations/brute_force_knn_uint32_t_float_uint.cu
- )
- set_target_properties(
- raft_nn_lib
- PROPERTIES OUTPUT_NAME raft_nn
- BUILD_RPATH "\$ORIGIN"
- INSTALL_RPATH "\$ORIGIN"
- CXX_STANDARD 17
- CXX_STANDARD_REQUIRED ON
- CUDA_STANDARD 17
- CUDA_STANDARD_REQUIRED ON
- POSITION_INDEPENDENT_CODE ON
- INTERFACE_POSITION_INDEPENDENT_CODE ON
+ raft_lib PRIVATE "$<$:${RAFT_CXX_FLAGS}>"
+ "$<$:${RAFT_CUDA_FLAGS}>"
)
+ target_compile_definitions(raft_lib INTERFACE "RAFT_COMPILED")
- target_link_libraries(
- raft_nn_lib
- PUBLIC faiss::faiss raft::raft
- PRIVATE nvidia::cutlass::cutlass
- )
- target_compile_options(
- raft_nn_lib PRIVATE "$<$:${RAFT_CXX_FLAGS}>"
- "$<$:${RAFT_CUDA_FLAGS}>"
- )
# ensure CUDA symbols aren't relocated to the middle of the debug build binaries
- target_link_options(raft_nn_lib PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
+ target_link_options(raft_lib PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld")
- target_compile_definitions(raft_nn_lib INTERFACE "RAFT_NN_COMPILED")
endif()
-if(TARGET raft_nn_lib AND (NOT TARGET raft::raft_nn_lib))
- add_library(raft::raft_nn_lib ALIAS raft_nn_lib)
+if(TARGET raft_lib AND (NOT TARGET raft::raft_lib))
+ add_library(raft::raft_lib ALIAS raft_lib)
endif()
-target_link_libraries(
- raft_nn INTERFACE raft::raft $ nvidia::cutlass::cutlass
-)
+target_link_libraries(raft_compiled INTERFACE raft::raft $)
# ##################################################################################################
# * raft_distributed -------------------------------------------------------------------------------
@@ -562,39 +514,23 @@ install(
)
install(
- TARGETS raft_distance
- DESTINATION ${lib_dir}
- COMPONENT raft
- EXPORT raft-distance-exports
-)
-
-install(
- TARGETS raft_nn
+ TARGETS raft_compiled
DESTINATION ${lib_dir}
COMPONENT raft
- EXPORT raft-nn-exports
+ EXPORT raft-compiled-exports
)
-if(TARGET raft_distance_lib)
+if(TARGET raft_lib)
install(
- TARGETS raft_distance_lib
+ TARGETS raft_lib
DESTINATION ${lib_dir}
- COMPONENT distance
- EXPORT raft-distance-lib-exports
+ COMPONENT compiled
+ EXPORT raft-compiled-lib-exports
)
install(
DIRECTORY include/raft_runtime
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
- COMPONENT distance
- )
-endif()
-
-if(TARGET raft_nn_lib)
- install(
- TARGETS raft_nn_lib
- DESTINATION ${lib_dir}
- COMPONENT nn
- EXPORT raft-nn-lib-exports
+ COMPONENT compiled
)
endif()
@@ -629,15 +565,11 @@ install(
include("${rapids-cmake-dir}/export/write_dependencies.cmake")
-set(raft_components distance nn distributed)
-set(raft_install_comp raft raft raft)
-if(TARGET raft_distance_lib)
- list(APPEND raft_components distance-lib)
- list(APPEND raft_install_comp distance)
-endif()
-if(TARGET raft_nn_lib)
- list(APPEND raft_components nn-lib)
- list(APPEND raft_install_comp nn)
+set(raft_components compiled distributed)
+set(raft_install_comp raft raft)
+if(TARGET raft_lib)
+ list(APPEND raft_components compiled-lib)
+ list(APPEND raft_install_comp compiled)
endif()
foreach(comp install_comp IN ZIP_LISTS raft_components raft_install_comp)
@@ -673,14 +605,12 @@ RAFT contains fundamental widely-used algorithms and primitives
for data science and machine learning.
Optional Components:
- - nn
- - distance
+ - compiled
- distributed
Imported Targets:
- raft::raft
- - raft::nn brought in by the `nn` optional component
- - raft::distance brought in by the `distance` optional component
+ - raft::compiled brought in by the `compiled` optional component
- raft::distributed brought in by the `distributed` optional component
]=]
@@ -688,34 +618,22 @@ Imported Targets:
set(code_string ${nvtx_export_string})
-if(RAFT_ENABLE_thrust_DEPENDENCY)
- string(
- APPEND
- code_string
- [=[
- if(NOT TARGET raft::Thrust)
- thrust_create_target(raft::Thrust FROM_OPTIONS)
- endif()
- ]=]
- )
-endif()
-
string(
APPEND
code_string
[=[
-if(distance IN_LIST raft_FIND_COMPONENTS)
- enable_language(CUDA)
+if(NOT TARGET raft::Thrust)
+ thrust_create_target(raft::Thrust FROM_OPTIONS)
endif()
+]=]
+)
-if(nn IN_LIST raft_FIND_COMPONENTS)
+string(
+ APPEND
+ code_string
+ [=[
+if(compiled IN_LIST raft_FIND_COMPONENTS)
enable_language(CUDA)
-
- if(TARGET faiss AND (NOT TARGET faiss::faiss))
- add_library(faiss::faiss ALIAS faiss)
- elseif(TARGET faiss::faiss AND (NOT TARGET faiss))
- add_library(faiss ALIAS faiss::faiss)
- endif()
endif()
]=]
)
@@ -723,21 +641,21 @@ endif()
# Use `rapids_export` for 22.04 as it will have COMPONENT support
include(cmake/modules/raft_export.cmake)
raft_export(
- INSTALL raft COMPONENTS nn distance distributed EXPORT_SET raft-exports GLOBAL_TARGETS raft nn
- distance distributed NAMESPACE raft:: DOCUMENTATION doc_string FINAL_CODE_BLOCK code_string
+ INSTALL raft COMPONENTS compiled distributed EXPORT_SET raft-exports GLOBAL_TARGETS raft compiled
+ distributed NAMESPACE raft:: DOCUMENTATION doc_string FINAL_CODE_BLOCK code_string
)
# ##################################################################################################
# * build export -------------------------------------------------------------
raft_export(
- BUILD raft EXPORT_SET raft-exports COMPONENTS nn distance distributed GLOBAL_TARGETS raft
- distance distributed nn DOCUMENTATION doc_string NAMESPACE raft:: FINAL_CODE_BLOCK code_string
+ BUILD raft EXPORT_SET raft-exports COMPONENTS compiled distributed GLOBAL_TARGETS raft compiled
+ distributed DOCUMENTATION doc_string NAMESPACE raft:: FINAL_CODE_BLOCK code_string
)
# ##################################################################################################
# * shared test/bench headers ------------------------------------------------
-if(BUILD_TESTS OR BUILD_BENCH)
+if(BUILD_TESTS OR BUILD_PRIMS_BENCH)
include(internal/CMakeLists.txt)
endif()
@@ -751,6 +669,13 @@ endif()
# ##################################################################################################
# * build benchmark executable -----------------------------------------------
-if(BUILD_BENCH)
- include(bench/CMakeLists.txt)
+if(BUILD_PRIMS_BENCH)
+ include(bench/prims/CMakeLists.txt)
+endif()
+
+# ##################################################################################################
+# * build ann benchmark executable -----------------------------------------------
+
+if(BUILD_ANN_BENCH)
+ include(bench/ann/CMakeLists.txt)
endif()
diff --git a/cpp/bench/CMakeLists.txt b/cpp/bench/CMakeLists.txt
index e2324de654..d92ccba8e3 100644
--- a/cpp/bench/CMakeLists.txt
+++ b/cpp/bench/CMakeLists.txt
@@ -17,7 +17,7 @@
function(ConfigureBench)
- set(options OPTIONAL DIST NN)
+ set(options OPTIONAL LIB)
set(oneValueArgs NAME)
set(multiValueArgs PATH TARGETS CONFIGURATIONS)
@@ -31,8 +31,7 @@ function(ConfigureBench)
${BENCH_NAME}
PRIVATE raft::raft
raft_internal
- $<$:raft::distance>
- $<$:raft::nn>
+ $<$:raft::compiled>
benchmark::benchmark
Threads::Threads
$
@@ -70,7 +69,12 @@ endfunction()
if(BUILD_BENCH)
ConfigureBench(
NAME CLUSTER_BENCH PATH bench/cluster/kmeans_balanced.cu bench/cluster/kmeans.cu bench/main.cpp
- OPTIONAL DIST NN
+ OPTIONAL LIB
+ )
+
+ ConfigureBench(
+ NAME TUNE_DISTANCE PATH bench/distance/tune_pairwise/kernel.cu
+ bench/distance/tune_pairwise/bench.cu bench/main.cpp
)
ConfigureBench(
@@ -86,7 +90,7 @@ if(BUILD_BENCH)
bench/distance/kernels.cu
bench/main.cpp
OPTIONAL
- DIST
+ LIB
)
ConfigureBench(
@@ -106,7 +110,7 @@ if(BUILD_BENCH)
ConfigureBench(
NAME MATRIX_BENCH PATH bench/matrix/argmin.cu bench/matrix/gather.cu bench/matrix/select_k.cu
- bench/main.cpp OPTIONAL DIST
+ bench/main.cpp OPTIONAL LIB
)
ConfigureBench(
@@ -132,7 +136,6 @@ if(BUILD_BENCH)
bench/neighbors/refine_uint8_t_int64_t.cu
bench/main.cpp
OPTIONAL
- DIST
- NN
+ LIB
)
endif()
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
new file mode 100644
index 0000000000..6267be518e
--- /dev/null
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -0,0 +1,160 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+# ##################################################################################################
+# * compiler function -----------------------------------------------------------------------------
+
+option(RAFT_ANN_BENCH_USE_FAISS_BFKNN "Include faiss' brute-force knn algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_USE_RAFT_BFKNN "Include raft's brute-force knn algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON)
+
+find_package(Threads REQUIRED)
+
+set(RAFT_ANN_BENCH_USE_FAISS OFF)
+if(RAFT_ANN_BENCH_USE_FAISS_BFKNN
+ OR RAFT_ANN_BENCH_USE_FAISS_IVFPQ
+ OR RAFT_ANN_BENCH_USE_FAISS_IFFLAT
+)
+ set(RAFT_ANN_BENCH_USE_FAISS ON)
+endif()
+
+set(RAFT_ANN_BENCH_USE_RAFT OFF)
+if(RAFT_ANN_BENCH_USE_RAFT_BFKNN
+ OR RAFT_ANN_BENCH_USE_RAFT_IVFPQ
+ OR RAFT_ANN_BENCH_USE_RAFT_IVFFLAT
+)
+ set(RAFT_ANN_BENCH_USE_RAFT ON)
+endif()
+
+if(RAFT_ANN_BENCH_USE_HNSWLIB)
+ include(cmake/thirdparty/get_hnswlib.cmake)
+endif()
+
+option(RAFT_ANN_BENCH_USE_MULTIGPU "Use multi-gpus (where possible) in benchmarks" OFF)
+
+include(cmake/thirdparty/get_nlohmann_json.cmake)
+
+if(RAFT_ANN_BENCH_USE_GGNN)
+ include(cmake/thirdparty/get_ggnn.cmake)
+endif()
+
+if(RAFT_ANN_BENCH_USE_FAISS)
+ include(cmake/thirdparty/get_faiss.cmake)
+endif()
+
+function(ConfigureAnnBench)
+
+ set(oneValueArgs NAME)
+ set(multiValueArgs PATH LINKS CXXFLAGS INCLUDES)
+
+ cmake_parse_arguments(
+ ConfigureAnnBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}
+ )
+
+ set(BENCH_NAME ${ConfigureAnnBench_NAME}_ANN_BENCH)
+
+ add_executable(
+ ${BENCH_NAME} ${ConfigureAnnBench_PATH} bench/ann/src/common/conf.cpp
+ bench/ann/src/common/util.cpp
+ )
+ target_link_libraries(
+ ${BENCH_NAME}
+ PRIVATE raft::raft
+ nlohmann_json::nlohmann_json
+ $<$:NCCL::NCCL>
+ ${ConfigureAnnBench_LINKS}
+ Threads::Threads
+ $
+ $
+ )
+
+ set_target_properties(
+ ${BENCH_NAME}
+ PROPERTIES # set target compile options
+ INSTALL_RPATH "\$ORIGIN/../../../lib"
+ CXX_STANDARD 17
+ CXX_STANDARD_REQUIRED ON
+ CUDA_STANDARD 17
+ CUDA_STANDARD_REQUIRED ON
+ POSITION_INDEPENDENT_CODE ON
+ INTERFACE_POSITION_INDEPENDENT_CODE ON
+ )
+
+ set(${ConfigureAnnBench_CXXFLAGS} ${RAFT_CXX_FLAGS} ${ConfigureAnnBench_CXXFLAGS})
+
+ target_compile_options(
+ ${BENCH_NAME} PRIVATE "$<$:${ConfigureAnnBench_CXXFLAGS}>"
+ "$<$:${RAFT_CUDA_FLAGS}>"
+ )
+
+ if(RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME})
+ target_compile_definitions(
+ ${BENCH_NAME}
+ PUBLIC
+ RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME}=RAFT_ANN_BENCH_USE_${ConfigureAnnBench_NAME}
+ )
+ endif()
+
+ target_include_directories(
+ ${BENCH_NAME}
+ PUBLIC "$"
+ PRIVATE ${ConfigureAnnBench_INCLUDES}
+ )
+
+ install(
+ TARGETS ${BENCH_NAME}
+ COMPONENT ann_bench
+ DESTINATION bin/ann
+ EXCLUDE_FROM_ALL
+ )
+endfunction()
+
+if(RAFT_ANN_BENCH_USE_HNSWLIB)
+ ConfigureAnnBench(
+ NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp INCLUDES
+ ${CMAKE_CURRENT_BINARY_DIR}/_deps/hnswlib-src/hnswlib CXXFLAGS "${HNSW_CXX_FLAGS}"
+ )
+endif()
+
+if(RAFT_ANN_BENCH_USE_RAFT)
+ ConfigureAnnBench(
+ NAME
+ RAFT_IVF_PQ
+ PATH
+ bench/ann/src/raft/raft_benchmark.cu
+ $<$:bench/ann/src/raft/raft_ivf_pq.cu>
+ $<$:bench/ann/src/raft/raft_ivf_flat.cu>
+ LINKS
+ raft::compiled
+ )
+endif()
+
+if(RAFT_ANN_BENCH_USE_FAISS)
+ ConfigureAnnBench(
+ NAME FAISS_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
+ )
+endif()
+
+if(RAFT_ANN_BENCH_USE_GGNN)
+ include(cmake/thirdparty/get_glog.cmake)
+ ConfigureAnnBench(
+ NAME GGNN PATH bench/ann/src/ggnn/ggnn_benchmark.cu INCLUDES
+ ${CMAKE_CURRENT_BINARY_DIR}/_deps/ggnn-src/include LINKS glog::glog
+ )
+endif()
diff --git a/cpp/bench/ann/README.md b/cpp/bench/ann/README.md
new file mode 100644
index 0000000000..1a8af2e448
--- /dev/null
+++ b/cpp/bench/ann/README.md
@@ -0,0 +1,3 @@
+# RAFT CUDA ANN Benchmarks
+
+Please see the [ANN Benchmarks](https://docs.rapids.ai/api/raft/stable/cuda_ann_benchmarks.html) section of the RAFT documentation for instructions on building and using the ANN benchmarks.
\ No newline at end of file
diff --git a/cpp/bench/ann/conf/bigann-100M.json b/cpp/bench/ann/conf/bigann-100M.json
new file mode 100644
index 0000000000..5f16f3378d
--- /dev/null
+++ b/cpp/bench/ann/conf/bigann-100M.json
@@ -0,0 +1,174 @@
+{
+ "dataset" : {
+ "name" : "bigann-100M",
+ "base_file" : "data/bigann-1B/base.1B.u8bin",
+ "subset_size" : 100000000,
+ "query_file" : "data/bigann-1B/query.public.10K.u8bin",
+ "distance" : "euclidean"
+ },
+
+ "search_basic_param" : {
+ "batch_size" : 10000,
+ "k" : 10,
+ "run_count" : 2
+ },
+
+ "index" : [
+ {
+ "name": "raft_ivf_pq.dimpq64-cluster5K-float-float",
+ "algo": "raft_ivf_pq",
+ "build_param": {
+ "niter": 25,
+ "nlist": 5000,
+ "pq_dim": 64,
+ "ratio": 10
+ },
+ "file": "index/bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
+ "search_params": [
+ {
+ "numProbes": 20,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "numProbes": 30,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "numProbes": 40,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "numProbes": 50,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "numProbes": 100,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "numProbes": 200,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "numProbes": 500,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "numProbes": 1000,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ }
+ ],
+ "search_result_file": "result/bigann-100M/raft_ivf_pq/dimpq64-cluster5K-float-float"
+ },
+ {
+ "name" : "hnswlib.M12",
+ "algo" : "hnswlib",
+ "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+ "file" : "index/bigann-100M/hnswlib/M12",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/bigann-100M/hnswlib/M12"
+ },
+ {
+ "name" : "hnswlib.M16",
+ "algo" : "hnswlib",
+ "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+ "file" : "index/bigann-100M/hnswlib/M16",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/bigann-100M/hnswlib/M16"
+ },
+ {
+ "name" : "hnswlib.M24",
+ "algo" : "hnswlib",
+ "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+ "file" : "index/bigann-100M/hnswlib/M24",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/bigann-100M/hnswlib/M24"
+ },
+ {
+ "name" : "hnswlib.M36",
+ "algo" : "hnswlib",
+ "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+ "file" : "index/bigann-100M/hnswlib/M36",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/bigann-100M/hnswlib/M36"
+ },
+
+
+ {
+ "name" : "ivf_flat.nlist100K",
+ "algo" : "ivf_flat",
+ "build_param": {
+ "nlist" : 100000,
+ "niter" : 25,
+ "ratio" : 5
+ },
+ "file" : "index/bigann-100M/ivf_flat/nlist100K",
+ "search_params" : [
+ {"max_batch":10000, "max_k":10, "nprobe":20},
+ {"max_batch":10000, "max_k":10, "nprobe":30},
+ {"max_batch":10000, "max_k":10, "nprobe":40},
+ {"max_batch":10000, "max_k":10, "nprobe":50},
+ {"max_batch":10000, "max_k":10, "nprobe":100},
+ {"max_batch":10000, "max_k":10, "nprobe":200},
+ {"max_batch":10000, "max_k":10, "nprobe":500},
+ {"max_batch":10000, "max_k":10, "nprobe":1000}
+ ],
+ "search_result_file" : "result/bigann-100M/ivf_flat/nlist100K"
+ },
+
+
+
+ ]
+}
diff --git a/cpp/bench/ann/conf/deep-100M.json b/cpp/bench/ann/conf/deep-100M.json
new file mode 100644
index 0000000000..b3a945d50e
--- /dev/null
+++ b/cpp/bench/ann/conf/deep-100M.json
@@ -0,0 +1,223 @@
+{
+ "dataset" : {
+ "name" : "deep-100M",
+ "base_file" : "data/deep-1B/base.1B.fbin",
+ "subset_size" : 100000000,
+ "query_file" : "data/deep-1B/query.public.10K.fbin",
+ "distance" : "euclidean"
+ },
+
+ "search_basic_param" : {
+ "batch_size" : 10000,
+ "k" : 10,
+ "run_count" : 2
+ },
+
+ "index" : [
+ {
+ "name" : "hnswlib.M12",
+ "algo" : "hnswlib",
+ "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+ "file" : "index/deep-100M/hnswlib/M12",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/deep-100M/hnswlib/M12"
+ },
+ {
+ "name" : "hnswlib.M16",
+ "algo" : "hnswlib",
+ "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+ "file" : "index/deep-100M/hnswlib/M16",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/deep-100M/hnswlib/M16"
+ },
+ {
+ "name" : "hnswlib.M24",
+ "algo" : "hnswlib",
+ "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+ "file" : "index/deep-100M/hnswlib/M24",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/deep-100M/hnswlib/M24"
+ },
+ {
+ "name" : "hnswlib.M36",
+ "algo" : "hnswlib",
+ "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+ "file" : "index/deep-100M/hnswlib/M36",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/deep-100M/hnswlib/M36"
+ },
+ {
+ "name" : "faiss_ivf_flat.nlist50K",
+ "algo" : "faiss_gpu_ivf_flat",
+ "build_param": {"nlist":50000},
+ "file" : "index/deep-100M/faiss_ivf_flat/nlist50K",
+ "search_params" : [
+ {"nprobe":20},
+ {"nprobe":30},
+ {"nprobe":40},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist50K"
+ },
+ {
+ "name" : "faiss_ivf_flat.nlist100K",
+ "algo" : "faiss_gpu_ivf_flat",
+ "build_param": {"nlist":100000},
+ "file" : "index/deep-100M/faiss_ivf_flat/nlist100K",
+ "search_params" : [
+ {"nprobe":20},
+ {"nprobe":30},
+ {"nprobe":40},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist100K"
+ },
+ {
+ "name" : "faiss_ivf_flat.nlist200K",
+ "algo" : "faiss_gpu_ivf_flat",
+ "build_param": {"nlist":200000},
+ "file" : "index/deep-100M/faiss_ivf_flat/nlist200K",
+ "search_params" : [
+ {"nprobe":20},
+ {"nprobe":30},
+ {"nprobe":40},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist200K"
+ },
+
+
+ {
+ "name" : "faiss_ivf_pq.M48-nlist16K",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":16384, "M":48},
+ "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist16K",
+ "search_params" : [
+ {"nprobe":10},
+ {"nprobe":20},
+ {"nprobe":30},
+ {"nprobe":40},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500}
+ ],
+ "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist16K"
+ },
+ {
+ "name" : "faiss_ivf_pq.M48-nlist50K",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":50000, "M":48},
+ "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist50K",
+ "search_params" : [
+ {"nprobe":20},
+ {"nprobe":30},
+ {"nprobe":40},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist50K"
+ },
+ {
+ "name" : "faiss_ivf_pq.M48-nlist100K",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":100000, "M":48},
+ "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist100K",
+ "search_params" : [
+ {"nprobe":20},
+ {"nprobe":30},
+ {"nprobe":40},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist100K"
+ },
+
+
+ {
+ "name" : "ivf_flat.nlist100K",
+ "algo" : "ivf_flat",
+ "build_param": {
+ "nlist" : 100000,
+ "niter" : 25,
+ "ratio" : 5
+ },
+ "file" : "index/deep-100M/ivf_flat/nlist100K",
+ "search_params" : [
+ {"max_batch":10000, "max_k":10, "nprobe":20},
+ {"max_batch":10000, "max_k":10, "nprobe":30},
+ {"max_batch":10000, "max_k":10, "nprobe":40},
+ {"max_batch":10000, "max_k":10, "nprobe":50},
+ {"max_batch":10000, "max_k":10, "nprobe":100},
+ {"max_batch":10000, "max_k":10, "nprobe":200},
+ {"max_batch":10000, "max_k":10, "nprobe":500},
+ {"max_batch":10000, "max_k":10, "nprobe":1000}
+ ],
+ "search_result_file" : "result/deep-100M/ivf_flat/nlist100K"
+ },
+
+
+ ]
+}
diff --git a/cpp/bench/ann/conf/deep-1B.json b/cpp/bench/ann/conf/deep-1B.json
new file mode 100644
index 0000000000..50d1b87602
--- /dev/null
+++ b/cpp/bench/ann/conf/deep-1B.json
@@ -0,0 +1,38 @@
+{
+ "dataset" : {
+ "name" : "deep-1B",
+ "base_file" : "data/deep-1B/base.1B.fbin",
+ "query_file" : "data/deep-1B/query.public.10K.fbin",
+ // although distance should be "euclidean", faiss becomes much slower for that
+ "distance" : "inner_product"
+ },
+
+ "search_basic_param" : {
+ "batch_size" : 10000,
+ "k" : 10,
+ "run_count" : 2
+ },
+
+ "index" : [
+ {
+ "name" : "faiss_ivf_pq.M48-nlist50K",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":50000, "M":48},
+ "file" : "index/deep-1B/faiss_ivf_pq/M48-nlist50K",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000},
+ {"nprobe":2000}
+ ],
+ "search_result_file" : "result/deep-1B/faiss_ivf_pq/M48-nlist50K"
+ },
+
+
+ ]
+}
diff --git a/cpp/bench/ann/conf/glove-100-inner.json b/cpp/bench/ann/conf/glove-100-inner.json
new file mode 100644
index 0000000000..d210aca654
--- /dev/null
+++ b/cpp/bench/ann/conf/glove-100-inner.json
@@ -0,0 +1,797 @@
+{
+ "dataset" : {
+ "name" : "glove-100-inner",
+ "base_file" : "data/glove-100-inner/base.fbin",
+ "query_file" : "data/glove-100-inner/query.fbin",
+ "distance" : "inner_product"
+ },
+
+ "search_basic_param" : {
+ "batch_size" : 1,
+ "k" : 10,
+ "run_count" : 3
+ },
+
+ "index" : [
+ {
+ "name" : "hnswlib.M4",
+ "algo" : "hnswlib",
+ "build_param": {"M":4, "efConstruction":500, "numThreads":4},
+ "file" : "index/glove-100-inner/hnswlib/M4",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/glove-100-inner/hnswlib/M4"
+ },
+
+ {
+ "name" : "hnswlib.M8",
+ "algo" : "hnswlib",
+ "build_param": {"M":8, "efConstruction":500, "numThreads":4},
+ "file" : "index/glove-100-inner/hnswlib/M8",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/glove-100-inner/hnswlib/M8"
+ },
+
+ {
+ "name" : "hnswlib.M12",
+ "algo" : "hnswlib",
+ "build_param": {"M":12, "efConstruction":500, "numThreads":4},
+ "file" : "index/glove-100-inner/hnswlib/M12",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/glove-100-inner/hnswlib/M12"
+ },
+
+ {
+ "name" : "hnswlib.M16",
+ "algo" : "hnswlib",
+ "build_param": {"M":16, "efConstruction":500, "numThreads":4},
+ "file" : "index/glove-100-inner/hnswlib/M16",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/glove-100-inner/hnswlib/M16"
+ },
+
+ {
+ "name" : "hnswlib.M24",
+ "algo" : "hnswlib",
+ "build_param": {"M":24, "efConstruction":500, "numThreads":4},
+ "file" : "index/glove-100-inner/hnswlib/M24",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/glove-100-inner/hnswlib/M24"
+ },
+
+ {
+ "name" : "hnswlib.M36",
+ "algo" : "hnswlib",
+ "build_param": {"M":36, "efConstruction":500, "numThreads":4},
+ "file" : "index/glove-100-inner/hnswlib/M36",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/glove-100-inner/hnswlib/M36"
+ },
+
+ {
+ "name" : "hnswlib.M48",
+ "algo" : "hnswlib",
+ "build_param": {"M":48, "efConstruction":500, "numThreads":4},
+ "file" : "index/glove-100-inner/hnswlib/M48",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/glove-100-inner/hnswlib/M48"
+ },
+
+ {
+ "name" : "hnswlib.M64",
+ "algo" : "hnswlib",
+ "build_param": {"M":64, "efConstruction":500, "numThreads":4},
+ "file" : "index/glove-100-inner/hnswlib/M64",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/glove-100-inner/hnswlib/M64"
+ },
+
+ {
+ "name" : "hnswlib.M96",
+ "algo" : "hnswlib",
+ "build_param": {"M":96, "efConstruction":500, "numThreads":4},
+ "file" : "index/glove-100-inner/hnswlib/M96",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/glove-100-inner/hnswlib/M96"
+ },
+
+ {
+ "name" : "faiss_ivf_flat.nlist1024",
+ "algo" : "faiss_gpu_ivf_flat",
+ "build_param": {"nlist":1024},
+ "file" : "index/glove-100-inner/faiss_ivf_flat/nlist1024",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist1024"
+ },
+
+ {
+ "name" : "faiss_ivf_flat.nlist2048",
+ "algo" : "faiss_gpu_ivf_flat",
+ "build_param": {"nlist":2048},
+ "file" : "index/glove-100-inner/faiss_ivf_flat/nlist2048",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist2048"
+ },
+
+ {
+ "name" : "faiss_ivf_flat.nlist4096",
+ "algo" : "faiss_gpu_ivf_flat",
+ "build_param": {"nlist":4096},
+ "file" : "index/glove-100-inner/faiss_ivf_flat/nlist4096",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist4096"
+ },
+
+ {
+ "name" : "faiss_ivf_flat.nlist8192",
+ "algo" : "faiss_gpu_ivf_flat",
+ "build_param": {"nlist":8192},
+ "file" : "index/glove-100-inner/faiss_ivf_flat/nlist8192",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist8192"
+ },
+
+ {
+ "name" : "faiss_ivf_flat.nlist16384",
+ "algo" : "faiss_gpu_ivf_flat",
+ "build_param": {"nlist":16384},
+ "file" : "index/glove-100-inner/faiss_ivf_flat/nlist16384",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000},
+ {"nprobe":2000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist16384"
+ },
+
+
+
+ {
+ "name" : "faiss_ivf_pq.M2-nlist1024",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":1024, "M":2},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist1024",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist1024"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M2-nlist2048",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":2048, "M":2},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist2048",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist2048"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M2-nlist4096",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":4096, "M":2},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist4096",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist4096"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M2-nlist8192",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":8192, "M":2},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist8192",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist8192"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M2-nlist16384",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":16384, "M":2},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist16384",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000},
+ {"nprobe":2000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist16384"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M4-nlist1024",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":1024, "M":4},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist1024",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist1024"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M4-nlist2048",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":2048, "M":4},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist2048",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist2048"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M4-nlist4096",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":4096, "M":4},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist4096",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist4096"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M4-nlist8192",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":8192, "M":4},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist8192",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist8192"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M4-nlist16384",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":16384, "M":4},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist16384",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000},
+ {"nprobe":2000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist16384"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M20-nlist1024",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":1024, "M":20},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist1024",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist1024"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M20-nlist2048",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":2048, "M":20},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist2048",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist2048"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M20-nlist4096",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":4096, "M":20},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist4096",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist4096"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M20-nlist8192",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":8192, "M":20},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist8192",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist8192"
+ },
+
+ {
+ "name" : "faiss_ivf_pq.M20-nlist16384",
+ "algo" : "faiss_gpu_ivf_pq",
+ "build_param": {"nlist":16384, "M":20},
+ "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist16384",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000},
+ {"nprobe":2000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist16384"
+ },
+
+
+ {
+ "name" : "faiss_ivf_sq.nlist1024-fp16",
+ "algo" : "faiss_gpu_ivf_sq",
+ "build_param": {"nlist":1024, "quantizer_type":"fp16"},
+ "file" : "index/glove-100-inner/faiss_ivf_sq/nlist1024-fp16",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist1024-fp16"
+ },
+
+ {
+ "name" : "faiss_ivf_sq.nlist2048-fp16",
+ "algo" : "faiss_gpu_ivf_sq",
+ "build_param": {"nlist":2048, "quantizer_type":"fp16"},
+ "file" : "index/glove-100-inner/faiss_ivf_sq/nlist2048-fp16",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist2048-fp16"
+ },
+
+ {
+ "name" : "faiss_ivf_sq.nlist4096-fp16",
+ "algo" : "faiss_gpu_ivf_sq",
+ "build_param": {"nlist":4096, "quantizer_type":"fp16"},
+ "file" : "index/glove-100-inner/faiss_ivf_sq/nlist4096-fp16",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist4096-fp16"
+ },
+
+ {
+ "name" : "faiss_ivf_sq.nlist8192-fp16",
+ "algo" : "faiss_gpu_ivf_sq",
+ "build_param": {"nlist":8192, "quantizer_type":"fp16"},
+ "file" : "index/glove-100-inner/faiss_ivf_sq/nlist8192-fp16",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist8192-fp16"
+ },
+
+ {
+ "name" : "faiss_ivf_sq.nlist16384-fp16",
+ "algo" : "faiss_gpu_ivf_sq",
+ "build_param": {"nlist":16384, "quantizer_type":"fp16"},
+ "file" : "index/glove-100-inner/faiss_ivf_sq/nlist16384-fp16",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000},
+ {"nprobe":2000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist16384-fp16"
+ },
+
+
+ {
+ "name" : "faiss_ivf_sq.nlist1024-int8",
+ "algo" : "faiss_gpu_ivf_sq",
+ "build_param": {"nlist":1024, "quantizer_type":"int8"},
+ "file" : "index/glove-100-inner/faiss_ivf_sq/nlist1024-int8",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist1024-int8"
+ },
+
+ {
+ "name" : "faiss_ivf_sq.nlist2048-int8",
+ "algo" : "faiss_gpu_ivf_sq",
+ "build_param": {"nlist":2048, "quantizer_type":"int8"},
+ "file" : "index/glove-100-inner/faiss_ivf_sq/nlist2048-int8",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist2048-int8"
+ },
+
+ {
+ "name" : "faiss_ivf_sq.nlist4096-int8",
+ "algo" : "faiss_gpu_ivf_sq",
+ "build_param": {"nlist":4096, "quantizer_type":"int8"},
+ "file" : "index/glove-100-inner/faiss_ivf_sq/nlist4096-int8",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist4096-int8"
+ },
+
+ {
+ "name" : "faiss_ivf_sq.nlist8192-int8",
+ "algo" : "faiss_gpu_ivf_sq",
+ "build_param": {"nlist":8192, "quantizer_type":"int8"},
+ "file" : "index/glove-100-inner/faiss_ivf_sq/nlist8192-int8",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist8192-int8"
+ },
+
+ {
+ "name" : "faiss_ivf_sq.nlist16384-int8",
+ "algo" : "faiss_gpu_ivf_sq",
+ "build_param": {"nlist":16384, "quantizer_type":"int8"},
+ "file" : "index/glove-100-inner/faiss_ivf_sq/nlist16384-int8",
+ "search_params" : [
+ {"nprobe":1},
+ {"nprobe":5},
+ {"nprobe":10},
+ {"nprobe":50},
+ {"nprobe":100},
+ {"nprobe":200},
+ {"nprobe":500},
+ {"nprobe":1000},
+ {"nprobe":2000}
+ ],
+ "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist16384-int8"
+ },
+
+ {
+ "name" : "faiss_flat",
+ "algo" : "faiss_gpu_flat",
+ "build_param": {},
+ "file" : "index/glove-100-inner/faiss_flat/flat",
+ "search_params" : [{}],
+ "search_result_file" : "result/glove-100-inner/faiss_flat/flat"
+ },
+
+ {
+ "name" : "ggnn.kbuild96-segment64-refine2-k10",
+ "algo" : "ggnn",
+ "build_param": {
+ "k_build": 96,
+ "segment_size": 64,
+ "refine_iterations": 2,
+ "dataset_size": 1183514,
+ "k": 10
+ },
+ "file" : "index/glove-100-inner/ggnn/kbuild96-segment64-refine2-k10",
+ "search_params" : [
+ {"tau":0.001, "block_dim":64, "sorted_size":32},
+ {"tau":0.005, "block_dim":64, "sorted_size":32},
+ {"tau":0.01, "block_dim":64, "sorted_size":32},
+ {"tau":0.02, "block_dim":64, "sorted_size":32},
+ {"tau":0.03, "block_dim":64, "sorted_size":32},
+ {"tau":0.04, "block_dim":64, "sorted_size":32},
+ {"tau":0.05, "block_dim":64, "sorted_size":32},
+ {"tau":0.06, "block_dim":64, "sorted_size":32},
+ {"tau":0.09, "block_dim":64, "sorted_size":32},
+ {"tau":0.12, "block_dim":64, "sorted_size":32},
+ {"tau":0.18, "block_dim":64, "sorted_size":32},
+ {"tau":0.21, "block_dim":64, "sorted_size":32},
+ {"tau":0.24, "block_dim":64, "sorted_size":32},
+ {"tau":0.27, "block_dim":64, "sorted_size":32},
+ {"tau":0.3, "block_dim":64, "sorted_size":32},
+ {"tau":0.4, "block_dim":64, "sorted_size":32},
+ {"tau":0.01, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.02, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.03, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.04, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.05, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.06, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.09, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.12, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.18, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.21, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.24, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.27, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.3, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.4, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+ {"tau":0.5, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}
+
+ ],
+ "search_result_file" : "result/glove-100-inner/ggnn/kbuild96-segment64-refine2-k10"
+ },
+
+
+ ]
+
+}
diff --git a/cpp/bench/ann/conf/sift-128-euclidean.json b/cpp/bench/ann/conf/sift-128-euclidean.json
new file mode 100644
index 0000000000..476c363ecd
--- /dev/null
+++ b/cpp/bench/ann/conf/sift-128-euclidean.json
@@ -0,0 +1,1321 @@
+{
+ "dataset": {
+ "name": "sift-128-euclidean",
+ "base_file": "/home/cjnolet/workspace/ann_data/sift-128-euclidean/base.fbin",
+ "query_file": "/home/cjnolet/workspace/ann_data/sift-128-euclidean/query.fbin",
+ "distance": "euclidean"
+ },
+ "search_basic_param": {
+ "batch_size": 5000,
+ "k": 10,
+ "run_count": 3
+ },
+ "index": [
+ {
+ "name" : "hnswlib.M12",
+ "algo" : "hnswlib",
+ "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+ "file" : "index/sift-128-euclidean/hnswlib/M12",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/sift-128-euclidean/hnswlib/M12"
+ },
+ {
+ "name" : "hnswlib.M16",
+ "algo" : "hnswlib",
+ "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+ "file" : "index/sift-128-euclidean/hnswlib/M16",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/sift-128-euclidean/hnswlib/M16"
+ },
+ {
+ "name" : "hnswlib.M24",
+ "algo" : "hnswlib",
+ "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+ "file" : "index/sift-128-euclidean/hnswlib/M24",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/sift-128-euclidean/hnswlib/M24"
+ },
+ {
+ "name" : "hnswlib.M36",
+ "algo" : "hnswlib",
+ "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+ "file" : "index/sift-128-euclidean/hnswlib/M36",
+ "search_params" : [
+ {"ef":10, "numThreads":1},
+ {"ef":20, "numThreads":1},
+ {"ef":40, "numThreads":1},
+ {"ef":60, "numThreads":1},
+ {"ef":80, "numThreads":1},
+ {"ef":120, "numThreads":1},
+ {"ef":200, "numThreads":1},
+ {"ef":400, "numThreads":1},
+ {"ef":600, "numThreads":1},
+ {"ef":800, "numThreads":1}
+ ],
+ "search_result_file" : "result/sift-128-euclidean/hnswlib/M36"
+ },
+
+
+
+
+ {
+ "name": "raft_bfknn",
+ "algo": "raft_bfknn",
+ "build_param": {},
+ "file": "index/sift-128-euclidean/raft_bfknn/bfknn",
+ "search_params": [
+ {
+ "probe": 1
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_bfknn/bfknn"
+ },
+ {
+ "name": "faiss_ivf_flat.nlist1024",
+ "algo": "faiss_gpu_ivf_flat",
+ "build_param": {
+ "nlist": 1024
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist1024",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist1024"
+ },
+ {
+ "name": "faiss_ivf_flat.nlist2048",
+ "algo": "faiss_gpu_ivf_flat",
+ "build_param": {
+ "nlist": 2048
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist2048",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist2048"
+ },
+ {
+ "name": "faiss_ivf_flat.nlist4096",
+ "algo": "faiss_gpu_ivf_flat",
+ "build_param": {
+ "nlist": 4096
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist4096",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist4096"
+ },
+ {
+ "name": "faiss_ivf_flat.nlist8192",
+ "algo": "faiss_gpu_ivf_flat",
+ "build_param": {
+ "nlist": 8192
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist8192",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist8192"
+ },
+ {
+ "name": "faiss_ivf_flat.nlist16384",
+ "algo": "faiss_gpu_ivf_flat",
+ "build_param": {
+ "nlist": 16384
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist16384",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ },
+ {
+ "nprobe": 2000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist16384"
+ },
+ {
+ "name": "faiss_ivf_pq.M64-nlist1024",
+ "algo": "faiss_gpu_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "M": 64,
+ "useFloat16": true,
+ "usePrecomputed": true
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024",
+ "search_params": [
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024"
+ },
+ {
+ "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
+ "algo": "faiss_gpu_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "M": 64,
+ "useFloat16": true,
+ "usePrecomputed": false
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
+ "search_params": [
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024"
+ },
+ {
+ "name": "faiss_ivf_sq.nlist1024-fp16",
+ "algo": "faiss_gpu_ivf_sq",
+ "build_param": {
+ "nlist": 1024,
+ "quantizer_type": "fp16"
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16"
+ },
+ {
+ "name": "faiss_ivf_sq.nlist2048-fp16",
+ "algo": "faiss_gpu_ivf_sq",
+ "build_param": {
+ "nlist": 2048,
+ "quantizer_type": "fp16"
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16"
+ },
+ {
+ "name": "faiss_ivf_sq.nlist4096-fp16",
+ "algo": "faiss_gpu_ivf_sq",
+ "build_param": {
+ "nlist": 4096,
+ "quantizer_type": "fp16"
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16"
+ },
+ {
+ "name": "faiss_ivf_sq.nlist8192-fp16",
+ "algo": "faiss_gpu_ivf_sq",
+ "build_param": {
+ "nlist": 8192,
+ "quantizer_type": "fp16"
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16"
+ },
+ {
+ "name": "faiss_ivf_sq.nlist16384-fp16",
+ "algo": "faiss_gpu_ivf_sq",
+ "build_param": {
+ "nlist": 16384,
+ "quantizer_type": "fp16"
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ },
+ {
+ "nprobe": 2000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16"
+ },
+ {
+ "name": "faiss_ivf_sq.nlist1024-int8",
+ "algo": "faiss_gpu_ivf_sq",
+ "build_param": {
+ "nlist": 1024,
+ "quantizer_type": "int8"
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist1024-int8",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist1024-int8"
+ },
+ {
+ "name": "faiss_ivf_sq.nlist2048-int8",
+ "algo": "faiss_gpu_ivf_sq",
+ "build_param": {
+ "nlist": 2048,
+ "quantizer_type": "int8"
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist2048-int8",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist2048-int8"
+ },
+ {
+ "name": "faiss_ivf_sq.nlist4096-int8",
+ "algo": "faiss_gpu_ivf_sq",
+ "build_param": {
+ "nlist": 4096,
+ "quantizer_type": "int8"
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist4096-int8",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist4096-int8"
+ },
+ {
+ "name": "faiss_ivf_sq.nlist8192-int8",
+ "algo": "faiss_gpu_ivf_sq",
+ "build_param": {
+ "nlist": 8192,
+ "quantizer_type": "int8"
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist8192-int8",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist8192-int8"
+ },
+ {
+ "name": "faiss_ivf_sq.nlist16384-int8",
+ "algo": "faiss_gpu_ivf_sq",
+ "build_param": {
+ "nlist": 16384,
+ "quantizer_type": "int8"
+ },
+ "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist16384-int8",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ },
+ {
+ "nprobe": 2000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist16384-int8"
+ },
+ {
+ "name": "faiss_flat",
+ "algo": "faiss_gpu_flat",
+ "build_param": {},
+ "file": "index/sift-128-euclidean/faiss_flat/flat",
+ "search_params": [
+ {}
+ ],
+ "search_result_file": "result/sift-128-euclidean/faiss_flat/flat"
+ },
+
+ {
+ "name": "raft_ivf_pq.dimpq128-cluster1024",
+ "algo": "raft_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "pq_dim": 128,
+ "ratio": 1,
+ "niter": 25
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024",
+ "search_params": [
+ {
+ "k": 10,
+ "numProbes": 10,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 50,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 100,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 200,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 500,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 1024,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "half"
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024"
+ },
+ {
+ "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+ "algo": "raft_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "pq_dim": 128,
+ "ratio": 1,
+ "niter": 25
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
+ "search_params": [
+ {
+ "k": 10,
+ "numProbes": 1,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 1,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 5,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 10,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 50,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 100,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 200,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 500,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 1024,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
+ },
+ {
+ "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+ "algo": "raft_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "pq_dim": 128,
+ "ratio": 1,
+ "niter": 25
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
+ "search_params": [
+ {
+ "k": 10,
+ "numProbes": 10,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 50,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 100,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 200,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 500,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 1024,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
+ },
+ {
+ "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
+ "algo": "raft_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "pq_dim": 128,
+ "ratio": 1,
+ "niter": 25
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+ "search_params": [
+ {
+ "k": 10,
+ "numProbes": 10,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 50,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 100,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 200,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 500,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 1024,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+ },
+ {
+ "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
+ "algo": "raft_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "pq_dim": 64,
+ "ratio": 1,
+ "niter": 25
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+ "search_params": [
+ {
+ "k": 10,
+ "numProbes": 10,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 50,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 100,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 200,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 500,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 1024,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+ },
+ {
+ "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
+ "algo": "raft_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "pq_dim": 64,
+ "ratio": 1,
+ "niter": 25
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
+ "search_params": [
+ {
+ "k": 10,
+ "numProbes": 10,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 50,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 100,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 200,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 500,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ },
+ {
+ "k": 10,
+ "numProbes": 1024,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "half"
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
+ },
+ {
+ "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
+ "algo": "raft_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "pq_dim": 32,
+ "ratio": 1,
+ "niter": 25
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+ "search_params": [
+ {
+ "k": 10,
+ "numProbes": 10,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 50,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 100,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 200,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 500,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 1024,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+ },
+ {
+ "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
+ "algo": "raft_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "pq_dim": 16,
+ "ratio": 1,
+ "niter": 25
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+ "search_params": [
+ {
+ "k": 10,
+ "numProbes": 10,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 50,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 100,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 200,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 500,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ },
+ {
+ "k": 10,
+ "numProbes": 1024,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "fp8"
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+ },
+ {
+ "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
+ "algo": "raft_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "pq_dim": 128,
+ "ratio": 1,
+ "niter": 25
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
+ "search_params": [
+ {
+ "k": 10,
+ "numProbes": 10,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 50,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 100,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 200,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 500,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 1024,
+ "internalDistanceDtype": "half",
+ "smemLutDtype": "float"
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
+ },
+ {
+ "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
+ "algo": "raft_ivf_pq",
+ "build_param": {
+ "nlist": 1024,
+ "pq_dim": 512,
+ "ratio": 1,
+ "niter": 25
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
+ "search_params": [
+ {
+ "k": 10,
+ "numProbes": 10,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 50,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 100,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 200,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 500,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ },
+ {
+ "k": 10,
+ "numProbes": 1024,
+ "internalDistanceDtype": "float",
+ "smemLutDtype": "float"
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
+ },
+ {
+ "name": "raft_ivf_flat.nlist1024",
+ "algo": "raft_ivf_flat",
+ "build_param": {
+ "nlist": 1024,
+ "ratio": 1,
+ "niter": 25
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_flat/nlist1024",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_flat/nlist1024"
+ },
+ {
+ "name": "raft_ivf_flat.nlist16384",
+ "algo": "raft_ivf_flat",
+ "build_param": {
+ "nlist": 16384,
+ "ratio": 2,
+ "niter": 20
+ },
+ "file": "index/sift-128-euclidean/raft_ivf_flat/nlist16384",
+ "search_params": [
+ {
+ "nprobe": 1
+ },
+ {
+ "nprobe": 5
+ },
+ {
+ "nprobe": 10
+ },
+ {
+ "nprobe": 50
+ },
+ {
+ "nprobe": 100
+ },
+ {
+ "nprobe": 200
+ },
+ {
+ "nprobe": 500
+ },
+ {
+ "nprobe": 1000
+ },
+ {
+ "nprobe": 2000
+ }
+ ],
+ "search_result_file": "result/sift-128-euclidean/raft_ivf_flat/nlist16384"
+ }
+ ]
+}
diff --git a/cpp/bench/ann/scripts/eval.pl b/cpp/bench/ann/scripts/eval.pl
new file mode 100755
index 0000000000..81c5563d79
--- /dev/null
+++ b/cpp/bench/ann/scripts/eval.pl
@@ -0,0 +1,430 @@
+#!/usr/bin/perl
+
+# =============================================================================
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+use warnings;
+use strict;
+use autodie qw(open close);
+use File::Find;
+use Getopt::Std;
+
+my $QPS = 'QPS';
+my $AVG_LATENCY = 'avg_latency(ms)';
+my $P99_LATENCY = 'p99_latency(ms)';
+my $P999_LATENCY = 'p999_latency(ms)';
+my @CONDITIONS = ([$QPS, 2000], ['recall', 0.9], ['recall', 0.95]);
+
+
+my $USAGE = << 'END';
+usage: [-f] [-l avg|p99|p999] [-o output.csv] groundtruth.neighbors.ibin result_paths...
+ result_paths... are paths to the search result files.
+ Can specify multiple paths.
+ For each of them, if it's a directory, all the .txt files found under
+ it recursively will be regarded as inputs.
+
+ -f: force to recompute recall and update it in result file if needed
+ -l: output search latency rather than QPS. Available options:
+ "avg" for average latency;
+ "p99" for 99th percentile latency;
+ "p999" for 99.9th percentile latency.
+ -o: also write result to a csv file
+END
+
+
+my %opt;
+getopts('fl:o:', \%opt)
+ or die $USAGE;
+my $force_calc_recall = exists $opt{f} ? 1 : 0;
+my $csv_file;
+$csv_file = $opt{o} if exists $opt{o};
+my $metric = $QPS;
+if (exists $opt{l}) {
+ my $option = $opt{l};
+ if ($option eq 'avg') {
+ $metric = $AVG_LATENCY;
+ }
+ elsif ($option eq 'p99') {
+ $metric = $P99_LATENCY;
+ }
+ elsif ($option eq 'p999') {
+ $metric = $P999_LATENCY;
+ }
+ else {
+ die
+ "[error] illegal value for '-l': '$option'. Must be 'avg', 'p99' or 'p999'\n";
+ }
+}
+
+@ARGV >= 2
+ or die $USAGE;
+
+
+my $truth_file = shift @ARGV;
+my ($k, $dataset, $distance, $results) = get_all_results($metric, @ARGV);
+if (!defined $k) {
+ print STDERR "no result file found\n";
+ exit -1;
+}
+print STDERR "dataset = $dataset, distance = $distance, k = $k\n\n";
+calc_missing_recall($results, $truth_file, $force_calc_recall);
+
+my @results = sort {
+ $a->{name} cmp $b->{name}
+ or $a->{recall} <=> $b->{recall}
+ or $b->{qps} <=> $a->{qps}
+} @$results;
+printf("%-60s %6s %16s %s\n", '', 'Recall', $metric, 'search_param');
+for my $result (@results) {
+ my $fmt = ($metric eq $QPS) ? '%16.1f' : '%16.3f';
+ my $qps = $result->{qps};
+ $qps *= 1000 if $metric ne $QPS; # the unit of latency is ms
+ printf("%-60s %6.4f ${fmt} %s\n",
+ $result->{name}, $result->{recall}, $qps, $result->{search_param});
+}
+if (defined $csv_file) {
+ open my $fh, '>', $csv_file;
+ print {$fh} ",Recall,${metric},search_param\n";
+ for my $result (@results) {
+ my $qps = $result->{qps};
+ $qps *= 1000 if $metric ne $QPS;
+ printf {$fh} (
+ "%s,%.4f,%.3f,%s\n", $result->{name}, $result->{recall},
+ $qps, $result->{search_param}
+ );
+ }
+}
+print "\n";
+calc_and_print_estimation($results, $metric, \@CONDITIONS);
+
+
+
+
+sub read_result {
+ my ($fname) = @_;
+ open my $fh, '<', $fname;
+ my %attr;
+ while (<$fh>) {
+ chomp;
+ next if /^\s*$/;
+ my $pos = index($_, ':');
+ $pos != -1
+ or die "[error] no ':' is found: '$_'\n";
+ my $key = substr($_, 0, $pos);
+ my $val = substr($_, $pos + 1);
+ $key =~ s/^\s+|\s+$//g;
+ $val =~ s/^\s+|\s+$//g;
+
+ # old version benchmark compatible
+ if ($key eq 'search_time') {
+ $key = 'average_search_time';
+ $val *= $attr{batch_size};
+ }
+ $attr{$key} = $val;
+ }
+ return \%attr;
+}
+
+sub overwrite_recall_to_result {
+ my ($fname, $recall) = @_;
+ open my $fh_in, '<', $fname;
+ $recall = sprintf("%f", $recall);
+ my $out;
+ while (<$fh_in>) {
+ s/^recall: .*/recall: $recall/;
+ $out .= $_;
+ }
+ close $fh_in;
+
+ open my $fh_out, '>', $fname;
+ print {$fh_out} $out;
+}
+
+sub append_recall_to_result {
+ my ($fname, $recall) = @_;
+ open my $fh, '>>', $fname;
+ printf {$fh} ("recall: %f\n", $recall);
+}
+
+sub get_all_results {
+ my ($metric) = shift @_;
+
+ my %fname;
+ my $wanted = sub {
+ if (-f && /\.txt$/) {
+ $fname{$File::Find::name} = 1;
+ }
+ };
+ find($wanted, @_);
+
+ my $k;
+ my $dataset;
+ my $distance;
+ my @results;
+ for my $f (sort keys %fname) {
+ print STDERR "reading $f ...\n";
+ my $attr = read_result($f);
+ if (!defined $k) {
+ $k = $attr->{k};
+ $dataset = $attr->{dataset};
+ $distance = $attr->{distance};
+ }
+ else {
+ $attr->{k} eq $k
+ or die "[error] k should be $k, but is $attr->{k} in $f\n";
+ $attr->{dataset} eq $dataset
+ or die
+ "[error] dataset should be $dataset, but is $attr->{dataset} in $f\n";
+ $attr->{distance} eq $distance
+ or die
+ "[error] distance should be $distance, but is $attr->{distance} in $f\n";
+ }
+
+ my $batch_size = $attr->{batch_size};
+ $batch_size =~ s/000000$/M/;
+ $batch_size =~ s/000$/K/;
+ my $search_param = $attr->{search_param};
+ $search_param =~ s/^{//;
+ $search_param =~ s/}$//;
+ $search_param =~ s/,/ /g;
+ $search_param =~ s/"//g;
+
+ my $qps;
+ if ($metric eq $QPS) {
+ $qps = $attr->{batch_size} / $attr->{average_search_time};
+ }
+ elsif ($metric eq $AVG_LATENCY) {
+ $qps = $attr->{average_search_time};
+ }
+ elsif ($metric eq $P99_LATENCY) {
+ exists $attr->{p99_search_time}
+ or die "[error] p99_search_time is not found\n";
+ $qps = $attr->{p99_search_time};
+ }
+ elsif ($metric eq $P999_LATENCY) {
+ exists $attr->{p999_search_time}
+ or die "[error] p999_search_time is not found\n";
+ $qps = $attr->{p999_search_time};
+ }
+ else {
+ die "[error] unknown latency type: '$metric'\n";
+ }
+ my $result = {
+ file => $f,
+ name => "$attr->{name}-batch${batch_size}",
+ search_param => $search_param,
+ qps => $qps,
+ };
+
+ if (exists $attr->{recall}) {
+ $result->{recall} = $attr->{recall};
+ }
+ push @results, $result;
+ }
+ return $k, $dataset, $distance, \@results;
+}
+
+sub read_ibin {
+ my ($fname) = @_;
+
+ open my $fh, '<:raw', $fname;
+ my $raw;
+
+ read($fh, $raw, 8);
+ my ($nrows, $dim) = unpack('LL', $raw);
+
+ my $expected_size = 8 + $nrows * $dim * 4;
+ my $size = (stat($fh))[7];
+ $size == $expected_size
+ or die(
+ "[error] expected size is $expected_size, but actual size is $size\n");
+
+ read($fh, $raw, $nrows * $dim * 4) == $nrows * $dim * 4
+ or die "[error] read $fname failed\n";
+ my @data = unpack('l' x ($nrows * $dim), $raw);
+ return \@data, $nrows, $dim;
+}
+
+sub pick_k_neighbors {
+ my ($neighbors, $nrows, $ncols, $k) = @_;
+
+ my @res;
+ for my $i (0 .. $nrows - 1) {
+ my %neighbor_set;
+ for my $j (0 .. $k - 1) {
+ $neighbor_set{$neighbors->[$i * $ncols + $j]} = 1;
+ }
+ push @res, \%neighbor_set;
+ }
+ return \@res;
+}
+
+
+sub calc_recall {
+ my ($truth_k_neighbors, $result_neighbors, $nrows, $k) = @_;
+
+ my $recall = 0;
+ for my $i (0 .. $nrows - 1) {
+ my $tp = 0;
+ for my $j (0 .. $k - 1) {
+ my $neighbor = $result_neighbors->[$i * $k + $j];
+ ++$tp if exists $truth_k_neighbors->[$i]{$neighbor};
+ }
+ $recall += $tp;
+ }
+ return $recall / $k / $nrows;
+}
+
+sub calc_missing_recall {
+ my ($results, $truth_file, $force_calc_recall) = @_;
+
+ my $need_calc_recall = grep { !exists $_->{recall} } @$results;
+ return unless $need_calc_recall || $force_calc_recall;
+
+ my ($truth_neighbors, $nrows, $truth_k) = read_ibin($truth_file);
+ $truth_k >= $k
+ or die "[error] ground truth k ($truth_k) < k($k)\n";
+ my $truth_k_neighbors =
+ pick_k_neighbors($truth_neighbors, $nrows, $truth_k, $k);
+
+ for my $result (@$results) {
+ next if exists $result->{recall} && !$force_calc_recall;
+
+ my $result_bin_file = $result->{file};
+ $result_bin_file =~ s/txt$/ibin/;
+ print STDERR "calculating recall for $result_bin_file ...\n";
+ my ($result_neighbors, $result_nrows, $result_k) =
+ read_ibin($result_bin_file);
+ $result_k == $k
+ or die
+ "[error] k should be $k, but is $result_k in $result_bin_file\n";
+ $result_nrows == $nrows
+ or die
+ "[error] #row should be $nrows, but is $result_nrows in $result_bin_file\n";
+
+ my $recall =
+ calc_recall($truth_k_neighbors, $result_neighbors, $nrows, $k);
+ if (exists $result->{recall}) {
+ my $new_value = sprintf("%f", $recall);
+ if ($result->{recall} ne $new_value) {
+ print "update recall: $result->{recall} -> $new_value\n";
+ overwrite_recall_to_result($result->{file}, $recall);
+ }
+ }
+ else {
+ append_recall_to_result($result->{file}, $recall);
+ }
+ $result->{recall} = $recall;
+ }
+}
+
+
+sub estimate {
+ my ($results, $condition, $value) = @_;
+ my %point_of;
+ for my $result (@$results) {
+ my $point;
+ if ($condition eq 'recall') {
+ $point = [$result->{recall}, $result->{qps}];
+ }
+ else {
+ $point = [$result->{qps}, $result->{recall}];
+ }
+ push @{$point_of{$result->{name}}}, $point;
+ }
+
+ my @names = sort keys %point_of;
+ my @result;
+ for my $name (@names) {
+ my @points = sort { $a->[0] <=> $b->[0] } @{$point_of{$name}};
+ if ($value < $points[0][0] || $value > $points[$#points][0]) {
+ push @result, -1;
+ next;
+ }
+ elsif ($value == $points[0][0]) {
+ push @result, $points[0][1];
+ next;
+ }
+
+ for my $i (1 .. $#points) {
+ if ($points[$i][0] >= $value) {
+ push @result,
+ linear_interpolation($value, @{$points[$i - 1]},
+ @{$points[$i]});
+ last;
+ }
+ }
+ }
+ return \@names, \@result;
+}
+
+sub linear_interpolation {
+ my ($x, $x1, $y1, $x2, $y2) = @_;
+ return $y1 + ($x - $x1) * ($y2 - $y1) / ($x2 - $x1);
+}
+
+sub merge {
+ my ($all, $new, $scale) = @_;
+ @$all == @$new
+ or die "[error] length is not equal\n";
+ for my $i (0 .. @$all - 1) {
+ push @{$all->[$i]}, $new->[$i] * $scale;
+ }
+}
+
+sub calc_and_print_estimation {
+ my ($results, $metric, $conditions) = @_;
+
+ my @conditions = grep {
+ my $target = $_->[0];
+ if ($target eq 'recall' || $target eq $metric) {
+ 1;
+ }
+ else {
+ $target eq $QPS
+ || $target eq $AVG_LATENCY
+ || $target eq $P99_LATENCY
+ || $target eq $P999_LATENCY
+ or die "[error] unknown condition: '$target'\n";
+ 0;
+ }
+ } @$conditions;
+
+ my @headers = map {
+ my $header;
+ if ($_->[0] eq 'recall') {
+ $header = $metric . '@recall' . $_->[1];
+ }
+ elsif ($_->[0] eq $metric) {
+ $header = 'recall@' . $metric . $_->[1];
+ }
+ $header;
+ } @conditions;
+
+ my $scale = ($metric eq $QPS) ? 1 : 1000;
+ my $estimations;
+ for my $condition (@conditions) {
+ my ($names, $estimate) = estimate($results, @$condition);
+ if (!defined $estimations) {
+ @$estimations = map { [$_] } @$names;
+ }
+ merge($estimations, $estimate, $scale);
+ }
+
+ my $fmt = "%-60s" . (" %16s" x @headers) . "\n";
+ printf($fmt, '', @headers);
+ $fmt =~ s/16s/16.4f/g;
+ for (@$estimations) {
+ printf($fmt, @$_);
+ }
+}
diff --git a/cpp/bench/ann/scripts/fbin_to_f16bin.py b/cpp/bench/ann/scripts/fbin_to_f16bin.py
new file mode 100755
index 0000000000..4ea8988d87
--- /dev/null
+++ b/cpp/bench/ann/scripts/fbin_to_f16bin.py
@@ -0,0 +1,46 @@
+# =============================================================================
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+import numpy as np
+
+
+def read_fbin(fname):
+ shape = np.fromfile(fname, dtype=np.uint32, count=2)
+ if float(shape[0]) * shape[1] * 4 > 2000000000:
+ data = np.memmap(fname, dtype=np.float32, offset=8, mode="r").reshape(
+ shape
+ )
+ else:
+ data = np.fromfile(fname, dtype=np.float32, offset=8).reshape(shape)
+ return data
+
+
+def write_bin(fname, data):
+ with open(fname, "wb") as f:
+ np.asarray(data.shape, dtype=np.uint32).tofile(f)
+ data.tofile(f)
+
+
+if len(sys.argv) != 3:
+ print(
+ "usage: %s input.fbin output.f16bin" % (sys.argv[0]),
+ file=sys.stderr,
+ )
+ sys.exit(-1)
+
+data = read_fbin(sys.argv[1]).astype(np.float16)
+write_bin(sys.argv[2], data)
diff --git a/cpp/bench/ann/scripts/hdf5_to_fbin.py b/cpp/bench/ann/scripts/hdf5_to_fbin.py
new file mode 100755
index 0000000000..cfeb184ea8
--- /dev/null
+++ b/cpp/bench/ann/scripts/hdf5_to_fbin.py
@@ -0,0 +1,85 @@
+# =============================================================================
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+import sys
+import numpy as np
+import h5py
+
+
+def normalize(x):
+ norm = np.linalg.norm(x, axis=1)
+ return (x.T / norm).T
+
+
+def write_bin(fname, data):
+ with open(fname, "wb") as f:
+ np.asarray(data.shape, dtype=np.uint32).tofile(f)
+ data.tofile(f)
+
+
+if __name__ == "__main__":
+ if len(sys.argv) != 2 and len(sys.argv) != 3:
+ print(
+ "usage: %s [-n] .hdf5\n" % (sys.argv[0]),
+ " -n: normalize base/query set\n",
+ "outputs: .base.fbin\n",
+ " .query.fbin\n",
+ " .groundtruth.neighbors.ibin\n",
+ " .groundtruth.distances.fbin",
+ file=sys.stderr,
+ )
+ sys.exit(-1)
+
+ need_normalize = False
+ if len(sys.argv) == 3:
+ assert sys.argv[1] == "-n"
+ need_normalize = True
+ fname_prefix = sys.argv[-1]
+ assert fname_prefix.endswith(".hdf5")
+ fname_prefix = fname_prefix[:-5]
+
+ hdf5 = h5py.File(sys.argv[-1], "r")
+ assert (
+ hdf5.attrs["distance"] == "angular"
+ or hdf5.attrs["distance"] == "euclidean"
+ )
+ assert hdf5["train"].dtype == np.float32
+ assert hdf5["test"].dtype == np.float32
+ assert hdf5["neighbors"].dtype == np.int32
+ assert hdf5["distances"].dtype == np.float32
+
+ base = hdf5["train"][:]
+ query = hdf5["test"][:]
+ if need_normalize:
+ base = normalize(base)
+ query = normalize(query)
+ elif hdf5.attrs["distance"] == "angular":
+ print(
+ "warning: input has angular distance, specify -n to normalize base/query set!\n"
+ )
+
+ output_fname = fname_prefix + ".base.fbin"
+ print("writing", output_fname, "...")
+ write_bin(output_fname, base)
+
+ output_fname = fname_prefix + ".query.fbin"
+ print("writing", output_fname, "...")
+ write_bin(output_fname, query)
+
+ output_fname = fname_prefix + ".groundtruth.neighbors.ibin"
+ print("writing", output_fname, "...")
+ write_bin(output_fname, hdf5["neighbors"][:])
+
+ output_fname = fname_prefix + ".groundtruth.distances.fbin"
+ print("writing", output_fname, "...")
+ write_bin(output_fname, hdf5["distances"][:])
diff --git a/cpp/bench/ann/scripts/split_groundtruth.pl b/cpp/bench/ann/scripts/split_groundtruth.pl
new file mode 100755
index 0000000000..b0a59f806c
--- /dev/null
+++ b/cpp/bench/ann/scripts/split_groundtruth.pl
@@ -0,0 +1,45 @@
+#!/usr/bin/perl
+
+# =============================================================================
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+use warnings;
+use strict;
+use autodie qw(open close);
+
+
+@ARGV == 2
+ or die "usage: $0 input output_prefix\n";
+
+open my $fh, '<:raw', $ARGV[0];
+
+my $raw;
+read($fh, $raw, 8);
+my ($nrows, $dim) = unpack('LL', $raw);
+
+my $expected_size = 8 + $nrows * $dim * (4 + 4);
+my $size = (stat($fh))[7];
+$size == $expected_size
+ or die("error: expected size is $expected_size, but actual size is $size\n");
+
+
+open my $fh_out1, '>:raw', "$ARGV[1].neighbors.ibin";
+open my $fh_out2, '>:raw', "$ARGV[1].distances.fbin";
+
+print {$fh_out1} $raw;
+print {$fh_out2} $raw;
+
+read($fh, $raw, $nrows * $dim * 4);
+print {$fh_out1} $raw;
+read($fh, $raw, $nrows * $dim * 4);
+print {$fh_out2} $raw;
diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp
new file mode 100644
index 0000000000..8f73896e07
--- /dev/null
+++ b/cpp/bench/ann/src/common/ann_types.hpp
@@ -0,0 +1,88 @@
+
+
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include
+#include
+
+#include
+
+namespace raft::bench::ann {
+
+enum class Metric {
+ kInnerProduct,
+ kEuclidean,
+};
+
+enum class MemoryType {
+ Host,
+ HostMmap,
+ Device,
+};
+
+struct AlgoProperty {
+ MemoryType dataset_memory_type;
+ // neighbors/distances should have same memory type as queries
+ MemoryType query_memory_type;
+ bool need_dataset_when_search;
+};
+
+template
+class ANN {
+ public:
+ struct AnnSearchParam {
+ virtual ~AnnSearchParam() = default;
+ };
+
+ ANN(Metric metric, int dim) : metric_(metric), dim_(dim) {}
+ virtual ~ANN() = default;
+
+ virtual void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) = 0;
+
+ virtual void set_search_param(const AnnSearchParam& param) = 0;
+ // TODO: this assumes that an algorithm can always return k results.
+ // This is not always possible.
+ virtual void search(const T* queries,
+ int batch_size,
+ int k,
+ size_t* neighbors,
+ float* distances,
+ cudaStream_t stream = 0) const = 0;
+
+ virtual void save(const std::string& file) const = 0;
+ virtual void load(const std::string& file) = 0;
+
+ virtual AlgoProperty get_property() const = 0;
+
+ // Some algorithms don't save the building dataset in their indices.
+ // So they should be given the access to that dataset during searching.
+ // The advantage of this way is that index has smaller size
+ // and many indices can share one dataset.
+ //
+ // AlgoProperty::need_dataset_when_search of such algorithm should be true,
+ // and set_search_dataset() should save the passed-in pointer somewhere.
+ // The client code should call set_search_dataset() before searching,
+ // and should not release dataset before searching is finished.
+ virtual void set_search_dataset(const T* /*dataset*/, size_t /*nrow*/){};
+
+ protected:
+ Metric metric_;
+ int dim_;
+};
+
+} // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
new file mode 100644
index 0000000000..b4d8fbeee3
--- /dev/null
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifdef NVTX
+#include
+#endif
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "benchmark_util.hpp"
+#include "conf.h"
+#include "dataset.h"
+#include "util.h"
+
+using std::cerr;
+using std::cout;
+using std::endl;
+using std::string;
+using std::to_string;
+using std::unordered_set;
+using std::vector;
+
+namespace raft::bench::ann {
+
+inline bool check_file_exist(const std::vector& files)
+{
+ bool ret = true;
+ std::unordered_set processed;
+ for (const auto& file : files) {
+ if (processed.find(file) == processed.end() && !file_exists(file)) {
+ log_error("file '%s' doesn't exist or is not a regular file", file.c_str());
+ ret = false;
+ }
+ processed.insert(file);
+ }
+ return ret;
+}
+
+inline bool check_file_not_exist(const std::vector& files, bool force_overwrite)
+{
+ bool ret = true;
+ for (const auto& file : files) {
+ if (file_exists(file)) {
+ if (force_overwrite) {
+ log_warn("'%s' already exists, will overwrite it", file.c_str());
+ } else {
+ log_error("'%s' already exists, use '-f' to force overwriting", file.c_str());
+ ret = false;
+ }
+ }
+ }
+ return ret;
+}
+
+inline bool check_no_duplicate_file(const std::vector& files)
+{
+ bool ret = true;
+ std::unordered_set processed;
+ for (const auto& file : files) {
+ if (processed.find(file) != processed.end()) {
+ log_error("'%s' occurs more than once as output file, would be overwritten", file.c_str());
+ ret = false;
+ }
+ processed.insert(file);
+ }
+ return ret;
+}
+
+inline bool mkdir(const std::vector& dirs)
+{
+ std::unordered_set processed;
+ for (const auto& dir : dirs) {
+ if (processed.find(dir) == processed.end() && !dir_exists(dir)) {
+ if (create_dir(dir)) {
+ log_info("mkdir '%s'", dir.c_str());
+ } else {
+ log_error("fail to create output directory '%s'", dir.c_str());
+ // won't create any other dir when problem occurs
+ return false;
+ }
+ }
+ processed.insert(dir);
+ }
+ return true;
+}
+
+inline bool check(const std::vector& indices,
+ bool build_mode,
+ bool force_overwrite)
+{
+ std::vector files_should_exist;
+ std::vector dirs_should_exist;
+ std::vector output_files;
+ for (const auto& index : indices) {
+ if (build_mode) {
+ output_files.push_back(index.file);
+ output_files.push_back(index.file + ".txt");
+
+ auto pos = index.file.rfind('/');
+ if (pos != std::string::npos) { dirs_should_exist.push_back(index.file.substr(0, pos)); }
+ } else {
+ files_should_exist.push_back(index.file);
+ files_should_exist.push_back(index.file + ".txt");
+
+ output_files.push_back(index.search_result_file + ".0.ibin");
+ output_files.push_back(index.search_result_file + ".0.txt");
+
+ auto pos = index.search_result_file.rfind('/');
+ if (pos != std::string::npos) {
+ dirs_should_exist.push_back(index.search_result_file.substr(0, pos));
+ }
+ }
+ }
+
+ bool ret = true;
+ if (!check_file_exist(files_should_exist)) { ret = false; }
+ if (!check_file_not_exist(output_files, force_overwrite)) { ret = false; }
+ if (!check_no_duplicate_file(output_files)) { ret = false; }
+ if (ret && !mkdir(dirs_should_exist)) { ret = false; }
+ return ret;
+}
+
+inline void write_build_info(const std::string& file_prefix,
+ const std::string& dataset,
+ const std::string& distance,
+ const std::string& name,
+ const std::string& algo,
+ const std::string& build_param,
+ float build_time)
+{
+ std::ofstream ofs(file_prefix + ".txt");
+ if (!ofs) { throw std::runtime_error("can't open build info file: " + file_prefix + ".txt"); }
+ ofs << "dataset: " << dataset << "\n"
+ << "distance: " << distance << "\n"
+ << "\n"
+ << "name: " << name << "\n"
+ << "algo: " << algo << "\n"
+ << "build_param: " << build_param << "\n"
+ << "build_time: " << build_time << endl;
+ ofs.close();
+ if (!ofs) { throw std::runtime_error("can't write to build info file: " + file_prefix + ".txt"); }
+}
+
+template
+void build(const Dataset* dataset, const std::vector& indices)
+{
+ cudaStream_t stream;
+ RAFT_CUDA_TRY(cudaStreamCreate(&stream));
+
+ log_info(
+ "base set from dataset '%s', #vector = %zu", dataset->name().c_str(), dataset->base_set_size());
+
+ for (const auto& index : indices) {
+ log_info("creating algo '%s', param=%s", index.algo.c_str(), index.build_param.dump().c_str());
+ auto algo = create_algo(index.algo,
+ dataset->distance(),
+ dataset->dim(),
+ index.refine_ratio,
+ index.build_param,
+ index.dev_list);
+ auto algo_property = algo->get_property();
+
+ const T* base_set_ptr = nullptr;
+ if (algo_property.dataset_memory_type == MemoryType::Host) {
+ log_info("%s", "loading base set to memory");
+ base_set_ptr = dataset->base_set();
+ } else if (algo_property.dataset_memory_type == MemoryType::HostMmap) {
+ log_info("%s", "mapping base set to memory");
+ base_set_ptr = dataset->mapped_base_set();
+ } else if (algo_property.dataset_memory_type == MemoryType::Device) {
+ log_info("%s", "loading base set to GPU");
+ base_set_ptr = dataset->base_set_on_gpu();
+ }
+
+ log_info("building index '%s'", index.name.c_str());
+ RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+#ifdef NVTX
+ nvtxRangePush("build");
+#endif
+ Timer timer;
+ algo->build(base_set_ptr, dataset->base_set_size(), stream);
+ RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+ float elapsed_ms = timer.elapsed_ms();
+#ifdef NVTX
+ nvtxRangePop();
+#endif
+ log_info("built index in %.2f seconds", elapsed_ms / 1000.0f);
+ RAFT_CUDA_TRY(cudaDeviceSynchronize());
+ RAFT_CUDA_TRY(cudaPeekAtLastError());
+
+ algo->save(index.file);
+ write_build_info(index.file,
+ dataset->name(),
+ dataset->distance(),
+ index.name,
+ index.algo,
+ index.build_param.dump(),
+ elapsed_ms / 1000.0f);
+ log_info("saved index to %s", index.file.c_str());
+ }
+
+ RAFT_CUDA_TRY(cudaStreamDestroy(stream));
+}
+
+inline void write_search_result(const std::string& file_prefix,
+ const std::string& dataset,
+ const std::string& distance,
+ const std::string& name,
+ const std::string& algo,
+ const std::string& build_param,
+ const std::string& search_param,
+ int batch_size,
+ int run_count,
+ int k,
+ float search_time_average,
+ float search_time_p99,
+ float search_time_p999,
+ const int* neighbors,
+ size_t query_set_size)
+{
+ std::ofstream ofs(file_prefix + ".txt");
+ if (!ofs) { throw std::runtime_error("can't open search result file: " + file_prefix + ".txt"); }
+ ofs << "dataset: " << dataset << "\n"
+ << "distance: " << distance << "\n"
+ << "\n"
+ << "name: " << name << "\n"
+ << "algo: " << algo << "\n"
+ << "build_param: " << build_param << "\n"
+ << "search_param: " << search_param << "\n"
+ << "\n"
+ << "batch_size: " << batch_size << "\n"
+ << "run_count: " << run_count << "\n"
+ << "k: " << k << "\n"
+ << "average_search_time: " << search_time_average << endl;
+ if (search_time_p99 != std::numeric_limits::max()) {
+ ofs << "p99_search_time: " << search_time_p99 << endl;
+ }
+ if (search_time_p999 != std::numeric_limits::max()) {
+ ofs << "p999_search_time: " << search_time_p999 << endl;
+ }
+ ofs.close();
+ if (!ofs) {
+ throw std::runtime_error("can't write to search result file: " + file_prefix + ".txt");
+ }
+
+ BinFile neighbors_file(file_prefix + ".ibin", "w");
+ neighbors_file.write(neighbors, query_set_size, k);
+}
+
+template
+inline void search(const Dataset* dataset, const std::vector& indices)
+{
+ if (indices.empty()) { return; }
+ cudaStream_t stream;
+ RAFT_CUDA_TRY(cudaStreamCreate(&stream));
+
+ log_info("loading query set from dataset '%s', #vector = %zu",
+ dataset->name().c_str(),
+ dataset->query_set_size());
+ const T* query_set = dataset->query_set();
+ // query set is usually much smaller than base set, so load it eagerly
+ const T* d_query_set = dataset->query_set_on_gpu();
+ size_t query_set_size = dataset->query_set_size();
+
+ // currently all indices has same batch_size, k and run_count
+ const int batch_size = indices[0].batch_size;
+ const int k = indices[0].k;
+ const int run_count = indices[0].run_count;
+ log_info(
+ "basic search parameters: batch_size = %d, k = %d, run_count = %d", batch_size, k, run_count);
+ if (query_set_size % batch_size != 0) {
+ log_warn("query set size (%zu) % batch size (%d) != 0, the size of last batch is %zu",
+ query_set_size,
+ batch_size,
+ query_set_size % batch_size);
+ }
+ const size_t num_batches = (query_set_size - 1) / batch_size + 1;
+ std::size_t* neighbors = new std::size_t[query_set_size * k];
+ int* neighbors_buf = new int[query_set_size * k];
+ float* distances = new float[query_set_size * k];
+ std::vector search_times;
+ search_times.reserve(num_batches);
+ std::size_t* d_neighbors;
+ float* d_distances;
+ RAFT_CUDA_TRY(cudaMalloc((void**)&d_neighbors, query_set_size * k * sizeof(*d_neighbors)));
+ RAFT_CUDA_TRY(cudaMalloc((void**)&d_distances, query_set_size * k * sizeof(*d_distances)));
+
+ for (const auto& index : indices) {
+ log_info("creating algo '%s', param=%s", index.algo.c_str(), index.build_param.dump().c_str());
+ auto algo = create_algo(index.algo,
+ dataset->distance(),
+ dataset->dim(),
+ index.refine_ratio,
+ index.build_param,
+ index.dev_list);
+ auto algo_property = algo->get_property();
+
+ log_info("loading index '%s' from file '%s'", index.name.c_str(), index.file.c_str());
+ algo->load(index.file);
+
+ const T* this_query_set = query_set;
+ std::size_t* this_neighbors = neighbors;
+ float* this_distances = distances;
+ if (algo_property.query_memory_type == MemoryType::Device) {
+ this_query_set = d_query_set;
+ this_neighbors = d_neighbors;
+ this_distances = d_distances;
+ }
+
+ if (algo_property.need_dataset_when_search) {
+ log_info("loading base set from dataset '%s', #vector = %zu",
+ dataset->name().c_str(),
+ dataset->base_set_size());
+ const T* base_set_ptr = nullptr;
+ if (algo_property.dataset_memory_type == MemoryType::Host) {
+ log_info("%s", "loading base set to memory");
+ base_set_ptr = dataset->base_set();
+ } else if (algo_property.dataset_memory_type == MemoryType::HostMmap) {
+ log_info("%s", "mapping base set to memory");
+ base_set_ptr = dataset->mapped_base_set();
+ } else if (algo_property.dataset_memory_type == MemoryType::Device) {
+ log_info("%s", "loading base set to GPU");
+ base_set_ptr = dataset->base_set_on_gpu();
+ }
+ algo->set_search_dataset(base_set_ptr, dataset->base_set_size());
+ }
+
+ for (int i = 0, end_i = index.search_params.size(); i != end_i; ++i) {
+ auto p_param = create_search_param(index.algo, index.search_params[i]);
+ algo->set_search_param(*p_param);
+ log_info("search with param: %s", index.search_params[i].dump().c_str());
+
+ if (algo_property.query_memory_type == MemoryType::Device) {
+ RAFT_CUDA_TRY(cudaMemset(d_neighbors, 0, query_set_size * k * sizeof(*d_neighbors)));
+ RAFT_CUDA_TRY(cudaMemset(d_distances, 0, query_set_size * k * sizeof(*d_distances)));
+ } else {
+ memset(neighbors, 0, query_set_size * k * sizeof(*neighbors));
+ memset(distances, 0, query_set_size * k * sizeof(*distances));
+ }
+
+ float best_search_time_average = std::numeric_limits::max();
+ float best_search_time_p99 = std::numeric_limits::max();
+ float best_search_time_p999 = std::numeric_limits::max();
+ for (int run = 0; run < run_count; ++run) {
+ log_info("run %d / %d", run + 1, run_count);
+ for (std::size_t batch_id = 0; batch_id < num_batches; ++batch_id) {
+ std::size_t row = batch_id * batch_size;
+ int actual_batch_size = (batch_id == num_batches - 1) ? query_set_size - row : batch_size;
+ RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+#ifdef NVTX
+ string nvtx_label = "batch" + to_string(batch_id);
+ if (run_count != 1) { nvtx_label = "run" + to_string(run) + "-" + nvtx_label; }
+ if (batch_id == 10) {
+ run = run_count - 1;
+ break;
+ }
+#endif
+ Timer timer;
+#ifdef NVTX
+ nvtxRangePush(nvtx_label.c_str());
+#endif
+ algo->search(this_query_set + row * dataset->dim(),
+ actual_batch_size,
+ k,
+ this_neighbors + row * k,
+ this_distances + row * k,
+ stream);
+ RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
+ float elapsed_ms = timer.elapsed_ms();
+#ifdef NVTX
+ nvtxRangePop();
+#endif
+ // If the size of the last batch is less than batch_size, don't count it for
+ // search time. But neighbors of the last batch will still be filled, so it's
+ // counted for recall calculation.
+ if (actual_batch_size == batch_size) {
+ search_times.push_back(elapsed_ms / 1000.0f); // in seconds
+ }
+ }
+
+ float search_time_average =
+ std::accumulate(search_times.cbegin(), search_times.cend(), 0.0f) / search_times.size();
+ best_search_time_average = std::min(best_search_time_average, search_time_average);
+
+ if (search_times.size() >= 100) {
+ std::sort(search_times.begin(), search_times.end());
+
+ auto calc_percentile_pos = [](float percentile, size_t N) {
+ return static_cast(std::ceil(percentile / 100.0 * N)) - 1;
+ };
+
+ float search_time_p99 = search_times[calc_percentile_pos(99, search_times.size())];
+ best_search_time_p99 = std::min(best_search_time_p99, search_time_p99);
+
+ if (search_times.size() >= 1000) {
+ float search_time_p999 = search_times[calc_percentile_pos(99.9, search_times.size())];
+ best_search_time_p999 = std::min(best_search_time_p999, search_time_p999);
+ }
+ }
+ search_times.clear();
+ }
+ RAFT_CUDA_TRY(cudaDeviceSynchronize());
+ RAFT_CUDA_TRY(cudaPeekAtLastError());
+
+ if (algo_property.query_memory_type == MemoryType::Device) {
+ RAFT_CUDA_TRY(cudaMemcpy(neighbors,
+ d_neighbors,
+ query_set_size * k * sizeof(*d_neighbors),
+ cudaMemcpyDeviceToHost));
+ RAFT_CUDA_TRY(cudaMemcpy(distances,
+ d_distances,
+ query_set_size * k * sizeof(*d_distances),
+ cudaMemcpyDeviceToHost));
+ }
+
+ for (size_t j = 0; j < query_set_size * k; ++j) {
+ neighbors_buf[j] = neighbors[j];
+ }
+ write_search_result(index.search_result_file + "." + to_string(i),
+ dataset->name(),
+ dataset->distance(),
+ index.name,
+ index.algo,
+ index.build_param.dump(),
+ index.search_params[i].dump(),
+ batch_size,
+ index.run_count,
+ k,
+ best_search_time_average,
+ best_search_time_p99,
+ best_search_time_p999,
+ neighbors_buf,
+ query_set_size);
+ }
+
+ log_info("finish searching for index '%s'", index.name.c_str());
+ }
+
+ delete[] neighbors;
+ delete[] neighbors_buf;
+ delete[] distances;
+ RAFT_CUDA_TRY(cudaFree(d_neighbors));
+ RAFT_CUDA_TRY(cudaFree(d_distances));
+ RAFT_CUDA_TRY(cudaStreamDestroy(stream));
+}
+
+inline const std::string usage(const string& argv0)
+{
+ return "usage: " + argv0 + " -b|s [-c] [-f] [-i index_names] conf.json\n" +
+ " -b: build mode, will build index\n" +
+ " -s: search mode, will search using built index\n" +
+ " one and only one of -b and -s should be specified\n" +
+ " -c: just check command line options and conf.json are sensible\n" +
+ " won't build or search\n" + " -f: force overwriting existing output files\n" +
+ " -i: by default will build/search all the indices found in conf.json\n" +
+ " '-i' can be used to select a subset of indices\n" +
+ " 'index_names' is a list of comma-separated index names\n" +
+ " '*' is allowed as the last character of a name to select all matched indices\n" +
+ " for example, -i \"hnsw1,hnsw2,faiss\" or -i \"hnsw*,faiss\"";
+}
+
+template
+inline int dispatch_benchmark(Configuration& conf,
+ std::string& index_patterns,
+ bool force_overwrite,
+ bool only_check,
+ bool build_mode,
+ bool search_mode)
+{
+ try {
+ auto dataset_conf = conf.get_dataset_conf();
+
+ BinDataset dataset(dataset_conf.name,
+ dataset_conf.base_file,
+ dataset_conf.subset_first_row,
+ dataset_conf.subset_size,
+ dataset_conf.query_file,
+ dataset_conf.distance);
+
+ vector indices = conf.get_indices(index_patterns);
+ if (!check(indices, build_mode, force_overwrite)) { return -1; }
+
+ std::string message = "will ";
+ message += build_mode ? "build:" : "search:";
+ for (const auto& index : indices) {
+ message += "\n " + index.name;
+ }
+ log_info("%s", message.c_str());
+
+ if (only_check) {
+ log_info("%s", "all check passed, quit due to option -c");
+ return 0;
+ }
+
+ if (build_mode) {
+ build(&dataset, indices);
+ } else if (search_mode) {
+ search(&dataset, indices);
+ }
+ } catch (const std::exception& e) {
+ log_error("exception occurred: %s", e.what());
+ return -1;
+ }
+
+ return 0;
+}
+
+inline int run_main(int argc, char** argv)
+{
+ bool force_overwrite = false;
+ bool build_mode = false;
+ bool search_mode = false;
+ bool only_check = false;
+ std::string index_patterns("*");
+
+ int opt;
+ while ((opt = getopt(argc, argv, "bscfi:h")) != -1) {
+ switch (opt) {
+ case 'b': build_mode = true; break;
+ case 's': search_mode = true; break;
+ case 'c': only_check = true; break;
+ case 'f': force_overwrite = true; break;
+ case 'i': index_patterns = optarg; break;
+ case 'h': cout << usage(argv[0]) << endl; return -1;
+ default: cerr << "\n" << usage(argv[0]) << endl; return -1;
+ }
+ }
+ if (build_mode == search_mode) {
+ std::cerr << "one and only one of -b and -s should be specified\n\n" << usage(argv[0]) << endl;
+ return -1;
+ }
+ if (argc - optind != 1) {
+ std::cerr << usage(argv[0]) << endl;
+ return -1;
+ }
+ string conf_file = argv[optind];
+
+ std::ifstream conf_stream(conf_file.c_str());
+ if (!conf_stream) {
+ log_error("can't open configuration file: %s", argv[optind]);
+ return -1;
+ }
+
+ try {
+ Configuration conf(conf_stream);
+ std::string dtype = conf.get_dataset_conf().dtype;
+
+ if (dtype == "float") {
+ return dispatch_benchmark(
+ conf, index_patterns, force_overwrite, only_check, build_mode, search_mode);
+ } else if (dtype == "uint8") {
+ return dispatch_benchmark(
+ conf, index_patterns, force_overwrite, only_check, build_mode, search_mode);
+ } else if (dtype == "int8") {
+ return dispatch_benchmark(
+ conf, index_patterns, force_overwrite, only_check, build_mode, search_mode);
+ } else {
+ log_error("datatype %s not supported", dtype);
+ }
+
+ } catch (const std::exception& e) {
+ log_error("exception occurred: %s", e.what());
+ return -1;
+ }
+
+ return -1;
+}
+}; // namespace raft::bench::ann
diff --git a/cpp/src/distance/distance/specializations/detail/l1_float_float_float_int.cu b/cpp/bench/ann/src/common/benchmark_util.hpp
similarity index 51%
rename from cpp/src/distance/distance/specializations/detail/l1_float_float_float_int.cu
rename to cpp/bench/ann/src/common/benchmark_util.hpp
index 7b45e52ca1..7005883ffc 100644
--- a/cpp/src/distance/distance/specializations/detail/l1_float_float_float_int.cu
+++ b/cpp/bench/ann/src/common/benchmark_util.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -13,26 +13,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+#pragma once
-#include
-#include
+#include "ann_types.hpp"
+#include
-namespace raft {
-namespace distance {
-namespace detail {
-template void distance(
- raft::resources const& handle,
- const float* x,
- const float* y,
- float* dist,
- int m,
- int n,
- int k,
- void* workspace,
- std::size_t worksize,
- bool isRowMajor,
- float metric_arg);
+namespace raft::bench::ann {
-} // namespace detail
-} // namespace distance
-} // namespace raft
+inline Metric parse_metric(const std::string& metric_str)
+{
+ if (metric_str == "inner_product") {
+ return raft::bench::ann::Metric::kInnerProduct;
+ } else if (metric_str == "euclidean") {
+ return raft::bench::ann::Metric::kEuclidean;
+ } else {
+ throw std::runtime_error("invalid metric: '" + metric_str + "'");
+ }
+}
+}; // namespace raft::bench::ann
\ No newline at end of file
diff --git a/cpp/bench/ann/src/common/conf.cpp b/cpp/bench/ann/src/common/conf.cpp
new file mode 100644
index 0000000000..f690f68783
--- /dev/null
+++ b/cpp/bench/ann/src/common/conf.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "conf.h"
+
+#include
+#include
+#include
+#include
+#include
+
+#include "util.h"
+
+namespace raft::bench::ann {
+using std::runtime_error;
+using std::string;
+using std::unordered_set;
+using std::vector;
+
+Configuration::Configuration(std::istream& conf_stream)
+{
+ // to enable comments in json
+ auto conf = nlohmann::json::parse(conf_stream, nullptr, true, true);
+
+ parse_dataset_(conf.at("dataset"));
+ parse_index_(conf.at("index"), conf.at("search_basic_param"));
+}
+
+vector Configuration::get_indices(const string& patterns) const
+{
+ vector names;
+ for (const auto& index : indices_) {
+ names.push_back(index.name);
+ }
+
+ auto matched = match_(names, patterns);
+ if (matched.empty()) { throw runtime_error("no available index matches '" + patterns + "'"); }
+
+ vector res;
+ for (const auto& index : indices_) {
+ if (matched.find(index.name) != matched.end()) { res.push_back(index); }
+ }
+ return res;
+}
+
+void Configuration::parse_dataset_(const nlohmann::json& conf)
+{
+ dataset_conf_.name = conf.at("name");
+ dataset_conf_.base_file = conf.at("base_file");
+ dataset_conf_.query_file = conf.at("query_file");
+ dataset_conf_.distance = conf.at("distance");
+
+ if (conf.contains("subset_first_row")) {
+ dataset_conf_.subset_first_row = conf.at("subset_first_row");
+ }
+ if (conf.contains("subset_size")) { dataset_conf_.subset_size = conf.at("subset_size"); }
+
+ if (conf.contains("dtype")) {
+ dataset_conf_.dtype = conf.at("dtype");
+ } else {
+ auto filename = dataset_conf_.base_file;
+ if (!filename.compare(filename.size() - 4, 4, "fbin")) {
+ dataset_conf_.dtype = "float";
+ } else if (!filename.compare(filename.size() - 5, 5, "u8bin")) {
+ dataset_conf_.dtype = "uint8";
+ } else if (!filename.compare(filename.size() - 5, 5, "i8bin")) {
+ dataset_conf_.dtype = "int8";
+ } else {
+ log_error("Could not determine data type of the dataset");
+ }
+ }
+}
+
+void Configuration::parse_index_(const nlohmann::json& index_conf,
+ const nlohmann::json& search_basic_conf)
+{
+ const int batch_size = search_basic_conf.at("batch_size");
+ const int k = search_basic_conf.at("k");
+ const int run_count = search_basic_conf.at("run_count");
+
+ for (const auto& conf : index_conf) {
+ Index index;
+ index.name = conf.at("name");
+ index.algo = conf.at("algo");
+ index.build_param = conf.at("build_param");
+ index.file = conf.at("file");
+ index.batch_size = batch_size;
+ index.k = k;
+ index.run_count = run_count;
+
+ if (conf.contains("multigpu")) {
+ for (auto it : conf.at("multigpu")) {
+ index.dev_list.push_back(it);
+ }
+ if (index.dev_list.empty()) { throw std::runtime_error("dev_list shouln't be empty!"); }
+ index.dev_list.shrink_to_fit();
+ index.build_param["multigpu"] = conf["multigpu"];
+ }
+
+ if (conf.contains("refine_ratio")) {
+ float refine_ratio = conf.at("refine_ratio");
+ if (refine_ratio <= 1.0f) {
+ throw runtime_error("'" + index.name + "': refine_ratio should > 1.0");
+ }
+ index.refine_ratio = refine_ratio;
+ }
+
+ for (const auto& param : conf.at("search_params")) {
+ index.search_params.push_back(param);
+ }
+ index.search_result_file = conf.at("search_result_file");
+
+ indices_.push_back(index);
+ }
+}
+
+unordered_set Configuration::match_(const vector& candidates,
+ const string& patterns) const
+{
+ unordered_set matched;
+ for (const auto& pat : split(patterns, ',')) {
+ if (pat.empty()) { continue; }
+
+ if (pat.back() == '*') {
+ auto len = pat.size() - 1;
+ for (const auto& item : candidates) {
+ if (item.compare(0, len, pat, 0, len) == 0) { matched.insert(item); }
+ }
+ } else {
+ for (const auto& item : candidates) {
+ if (item == pat) { matched.insert(item); }
+ }
+ }
+ }
+
+ return matched;
+}
+
+} // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/conf.h b/cpp/bench/ann/src/common/conf.h
new file mode 100644
index 0000000000..845defe94a
--- /dev/null
+++ b/cpp/bench/ann/src/common/conf.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include
+#include
+#include
+#include
+
+#define JSON_DIAGNOSTICS 1
+#include
+
+namespace raft::bench::ann {
+
+class Configuration {
+ public:
+ struct Index {
+ std::string name;
+ std::string algo;
+ nlohmann::json build_param;
+ std::string file;
+ std::vector dev_list;
+
+ int batch_size;
+ int k;
+ int run_count;
+ std::vector search_params;
+ std::string search_result_file;
+ float refine_ratio{0.0f};
+ };
+
+ struct DatasetConf {
+ std::string name;
+ std::string base_file;
+ // use only a subset of base_file,
+ // the range of rows is [subset_first_row, subset_first_row + subset_size)
+ // however, subset_size = 0 means using all rows after subset_first_row
+ // that is, the subset is [subset_first_row, #rows in base_file)
+ size_t subset_first_row{0};
+ size_t subset_size{0};
+ std::string query_file;
+ std::string distance;
+
+ // data type of input dataset, possible values ["float", "int8", "uint8"]
+ std::string dtype;
+ };
+
+ Configuration(std::istream& conf_stream);
+
+ DatasetConf get_dataset_conf() const { return dataset_conf_; }
+ std::vector get_indices(const std::string& patterns) const;
+
+ private:
+ void parse_dataset_(const nlohmann::json& conf);
+ void parse_index_(const nlohmann::json& index_conf, const nlohmann::json& search_basic_conf);
+ std::unordered_set match_(const std::vector& candidates,
+ const std::string& patterns) const;
+
+ DatasetConf dataset_conf_;
+ std::vector indices_;
+};
+
+} // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/dataset.h b/cpp/bench/ann/src/common/dataset.h
new file mode 100644
index 0000000000..1244935c99
--- /dev/null
+++ b/cpp/bench/ann/src/common/dataset.h
@@ -0,0 +1,381 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+namespace raft::bench::ann {
+
+// http://big-ann-benchmarks.com/index.html:
+// binary format that starts with 8 bytes of data consisting of num_points(uint32_t)
+// num_dimensions(uint32) followed by num_pts x num_dimensions x sizeof(type) bytes of
+// data stored one vector after another.
+// Data files will have suffixes .fbin, .u8bin, and .i8bin to represent float32, uint8
+// and int8 type data.
+// As extensions for this benchmark, half and int data files will have suffixes .f16bin
+// and .ibin, respectively.
+template
+class BinFile {
+ public:
+ BinFile(const std::string& file,
+ const std::string& mode,
+ uint32_t subset_first_row = 0,
+ uint32_t subset_size = 0);
+ ~BinFile() { fclose(fp_); }
+ BinFile(const BinFile&) = delete;
+ BinFile& operator=(const BinFile&) = delete;
+
+ void get_shape(size_t* nrows, int* ndims)
+ {
+ assert(read_mode_);
+ *nrows = nrows_;
+ *ndims = ndims_;
+ }
+
+ void read(T* data) const
+ {
+ assert(read_mode_);
+ size_t total = static_cast(nrows_) * ndims_;
+ if (fread(data, sizeof(T), total, fp_) != total) {
+ throw std::runtime_error("fread() BinFile " + file_ + " failed");
+ }
+ }
+
+ void write(const T* data, uint32_t nrows, uint32_t ndims)
+ {
+ assert(!read_mode_);
+ if (fwrite(&nrows, sizeof(uint32_t), 1, fp_) != 1) {
+ throw std::runtime_error("fwrite() BinFile " + file_ + " failed");
+ }
+ if (fwrite(&ndims, sizeof(uint32_t), 1, fp_) != 1) {
+ throw std::runtime_error("fwrite() BinFile " + file_ + " failed");
+ }
+
+ size_t total = static_cast(nrows) * ndims;
+ if (fwrite(data, sizeof(T), total, fp_) != total) {
+ throw std::runtime_error("fwrite() BinFile " + file_ + " failed");
+ }
+ }
+
+ void* map() const
+ {
+ assert(read_mode_);
+ int fid = fileno(fp_);
+ auto mmap_ptr = mmap(NULL, file_size_, PROT_READ, MAP_PRIVATE, fid, 0);
+ if (mmap_ptr == MAP_FAILED) {
+ throw std::runtime_error("mmap error: Value of errno " + std::to_string(errno) + ", " +
+ std::string(strerror(errno)));
+ }
+ return mmap_ptr;
+ }
+
+ void unmap(void* data) const
+ {
+ if (munmap(data, file_size_) == -1) {
+ throw std::runtime_error("munmap error: " + std::string(strerror(errno)));
+ }
+ }
+
+ private:
+ void check_suffix_();
+
+ std::string file_;
+ FILE* fp_;
+ bool read_mode_;
+ uint32_t nrows_;
+ uint32_t ndims_;
+ size_t file_size_;
+};
+
+template
+BinFile::BinFile(const std::string& file,
+ const std::string& mode,
+ uint32_t subset_first_row,
+ uint32_t subset_size)
+ : file_(file)
+{
+ check_suffix_();
+
+ if (mode == "r") {
+ read_mode_ = true;
+ } else if (mode == "w") {
+ read_mode_ = false;
+ if (subset_first_row != 0) {
+ throw std::runtime_error("subset_first_row should be zero for write mode");
+ }
+ if (subset_size != 0) { throw std::runtime_error("subset_size should be zero for write mode"); }
+ } else {
+ throw std::runtime_error("BinFile's mode must be either 'r' or 'w': " + file_);
+ }
+
+ fp_ = fopen(file_.c_str(), mode.c_str());
+ if (!fp_) { throw std::runtime_error("open BinFile failed: " + file_); }
+
+ if (read_mode_) {
+ struct stat statbuf;
+ if (stat(file_.c_str(), &statbuf) != 0) { throw std::runtime_error("stat() failed: " + file_); }
+ file_size_ = statbuf.st_size;
+
+ uint32_t header[2];
+ if (fread(header, sizeof(uint32_t), 2, fp_) != 2) {
+ throw std::runtime_error("read header of BinFile failed: " + file_);
+ }
+ nrows_ = header[0];
+ ndims_ = header[1];
+
+ size_t expected_file_size =
+ 2 * sizeof(uint32_t) + static_cast(nrows_) * ndims_ * sizeof(T);
+ if (file_size_ != expected_file_size) {
+ throw std::runtime_error("expected file size of " + file_ + " is " +
+ std::to_string(expected_file_size) + ", however, actual size is " +
+ std::to_string(file_size_));
+ }
+
+ if (subset_first_row >= nrows_) {
+ throw std::runtime_error(file_ + ": subset_first_row (" + std::to_string(subset_first_row) +
+ ") >= nrows (" + std::to_string(nrows_) + ")");
+ }
+ if (subset_first_row + subset_size > nrows_) {
+ throw std::runtime_error(file_ + ": subset_first_row (" + std::to_string(subset_first_row) +
+ ") + subset_size (" + std::to_string(subset_size) + ") > nrows (" +
+ std::to_string(nrows_) + ")");
+ }
+
+ if (subset_first_row) {
+ static_assert(sizeof(long) == 8, "fseek() don't support 64-bit offset");
+ if (fseek(fp_, sizeof(T) * subset_first_row * ndims_, SEEK_CUR) == -1) {
+ throw std::runtime_error(file_ + ": fseek failed");
+ }
+ nrows_ -= subset_first_row;
+ }
+ if (subset_size) { nrows_ = subset_size; }
+ }
+}
+
+template
+void BinFile::check_suffix_()
+{
+ auto pos = file_.rfind('.');
+ if (pos == std::string::npos) {
+ throw std::runtime_error("name of BinFile doesn't have a suffix: " + file_);
+ }
+ std::string suffix = file_.substr(pos + 1);
+
+ if constexpr (std::is_same_v) {
+ if (suffix != "fbin") {
+ throw std::runtime_error("BinFile should has .fbin suffix: " + file_);
+ }
+ } else if constexpr (std::is_same_v) {
+ if (suffix != "f16bin") {
+ throw std::runtime_error("BinFile should has .f16bin suffix: " + file_);
+ }
+ } else if constexpr (std::is_same_v) {
+ if (suffix != "ibin") {
+ throw std::runtime_error("BinFile should has .ibin suffix: " + file_);
+ }
+ } else if constexpr (std::is_same_v) {
+ if (suffix != "u8bin") {
+ throw std::runtime_error("BinFile should has .u8bin suffix: " + file_);
+ }
+ } else if constexpr (std::is_same_v) {
+ if (suffix != "i8bin") {
+ throw std::runtime_error("BinFile should has .i8bin suffix: " + file_);
+ }
+ } else {
+ throw std::runtime_error(
+ "T of BinFile should be one of float, half, int, uint8_t, or int8_t");
+ }
+}
+
+template
+class Dataset {
+ public:
+ Dataset(const std::string& name) : name_(name) {}
+ Dataset(const std::string& name, const std::string& distance) : name_(name), distance_(distance)
+ {
+ }
+ Dataset(const Dataset&) = delete;
+ Dataset& operator=(const Dataset&) = delete;
+ virtual ~Dataset();
+
+ std::string name() const { return name_; }
+ std::string distance() const { return distance_; }
+ int dim() const { return dim_; }
+ size_t base_set_size() const { return base_set_size_; }
+ size_t query_set_size() const { return query_set_size_; }
+
+ // load data lazily, so don't pay the overhead of reading unneeded set
+ // e.g. don't load base set when searching
+ const T* base_set() const
+ {
+ if (!base_set_) { load_base_set_(); }
+ return base_set_;
+ }
+
+ const T* query_set() const
+ {
+ if (!query_set_) { load_query_set_(); }
+ return query_set_;
+ }
+
+ const T* base_set_on_gpu() const;
+ const T* query_set_on_gpu() const;
+ const T* mapped_base_set() const;
+
+ protected:
+ virtual void load_base_set_() const = 0;
+ virtual void load_query_set_() const = 0;
+ virtual void map_base_set_() const = 0;
+
+ std::string name_;
+ std::string distance_;
+ int dim_;
+ size_t base_set_size_;
+ size_t query_set_size_;
+
+ mutable T* base_set_ = nullptr;
+ mutable T* query_set_ = nullptr;
+ mutable T* d_base_set_ = nullptr;
+ mutable T* d_query_set_ = nullptr;
+ mutable T* mapped_base_set_ = nullptr;
+};
+
+template
+Dataset::~Dataset()
+{
+ delete[] base_set_;
+ delete[] query_set_;
+ if (d_base_set_) { RAFT_CUDA_TRY_NO_THROW(cudaFree(d_base_set_)); }
+ if (d_query_set_) { RAFT_CUDA_TRY_NO_THROW(cudaFree(d_query_set_)); }
+}
+
+template
+const T* Dataset::base_set_on_gpu() const
+{
+ if (!d_base_set_) {
+ base_set();
+ RAFT_CUDA_TRY(cudaMalloc((void**)&d_base_set_, base_set_size_ * dim_ * sizeof(T)));
+ RAFT_CUDA_TRY(cudaMemcpy(
+ d_base_set_, base_set_, base_set_size_ * dim_ * sizeof(T), cudaMemcpyHostToDevice));
+ }
+ return d_base_set_;
+}
+
+template
+const T* Dataset::query_set_on_gpu() const
+{
+ if (!d_query_set_) {
+ query_set();
+ RAFT_CUDA_TRY(cudaMalloc((void**)&d_query_set_, query_set_size_ * dim_ * sizeof(T)));
+ RAFT_CUDA_TRY(cudaMemcpy(
+ d_query_set_, query_set_, query_set_size_ * dim_ * sizeof(T), cudaMemcpyHostToDevice));
+ }
+ return d_query_set_;
+}
+
+template
+const T* Dataset::mapped_base_set() const
+{
+ if (!mapped_base_set_) { map_base_set_(); }
+ return mapped_base_set_;
+}
+
+template
+class BinDataset : public Dataset {
+ public:
+ BinDataset(const std::string& name,
+ const std::string& base_file,
+ size_t subset_first_row,
+ size_t subset_size,
+ const std::string& query_file,
+ const std::string& distance);
+ ~BinDataset()
+ {
+ if (this->mapped_base_set_) {
+ base_file_.unmap(reinterpret_cast(this->mapped_base_set_) - subset_offset_);
+ }
+ }
+
+ private:
+ void load_base_set_() const override;
+ void load_query_set_() const override;
+ void map_base_set_() const override;
+
+ using Dataset::dim_;
+ using Dataset::base_set_size_;
+ using Dataset::query_set_size_;
+
+ BinFile base_file_;
+ BinFile query_file_;
+
+ size_t subset_offset_;
+};
+
+template
+BinDataset::BinDataset(const std::string& name,
+ const std::string& base_file,
+ size_t subset_first_row,
+ size_t subset_size,
+ const std::string& query_file,
+ const std::string& distance)
+ : Dataset(name, distance),
+ base_file_(base_file, "r", subset_first_row, subset_size),
+ query_file_(query_file, "r"),
+ subset_offset_(2 * sizeof(uint32_t) + subset_first_row * dim_ * sizeof(T))
+{
+ base_file_.get_shape(&base_set_size_, &dim_);
+ int query_dim;
+ query_file_.get_shape(&query_set_size_, &query_dim);
+ if (query_dim != dim_) {
+ throw std::runtime_error("base set dim (" + std::to_string(dim_) + ") != query set dim (" +
+ std::to_string(query_dim));
+ }
+}
+
+template
+void BinDataset::load_base_set_() const
+{
+ this->base_set_ = new T[base_set_size_ * dim_];
+ base_file_.read(this->base_set_);
+}
+
+template
+void BinDataset::load_query_set_() const
+{
+ this->query_set_ = new T[query_set_size_ * dim_];
+ query_file_.read(this->query_set_);
+}
+
+template
+void BinDataset::map_base_set_() const
+{
+ char* original_map_ptr = static_cast(base_file_.map());
+ this->mapped_base_set_ = reinterpret_cast(original_map_ptr + subset_offset_);
+}
+
+} // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/util.cpp b/cpp/bench/ann/src/common/util.cpp
new file mode 100644
index 0000000000..17636f76d7
--- /dev/null
+++ b/cpp/bench/ann/src/common/util.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "util.h"
+
+#include
+#include
+
+#include
+#include
+
+namespace raft::bench::ann {
+
+std::vector split(const std::string& s, char delimiter)
+{
+ std::vector tokens;
+ std::string token;
+ std::istringstream iss(s);
+ while (getline(iss, token, delimiter)) {
+ if (!token.empty()) { tokens.push_back(token); }
+ }
+ return tokens;
+}
+
+bool file_exists(const std::string& filename)
+{
+ struct stat statbuf;
+ if (stat(filename.c_str(), &statbuf) != 0) { return false; }
+ return S_ISREG(statbuf.st_mode);
+}
+
+bool dir_exists(const std::string& dir)
+{
+ struct stat statbuf;
+ if (stat(dir.c_str(), &statbuf) != 0) { return false; }
+ return S_ISDIR(statbuf.st_mode);
+}
+
+bool create_dir(const std::string& dir)
+{
+ const auto path = split(dir, '/');
+
+ std::string cwd;
+ if (!dir.empty() && dir[0] == '/') { cwd += '/'; }
+
+ for (const auto& p : path) {
+ cwd += p + "/";
+ if (!dir_exists(cwd)) {
+ int ret = mkdir(cwd.c_str(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
+ if (ret != 0) { return false; }
+ }
+ }
+ return true;
+}
+
+} // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/util.h b/cpp/bench/ann/src/common/util.h
new file mode 100644
index 0000000000..290bf4cea9
--- /dev/null
+++ b/cpp/bench/ann/src/common/util.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace raft::bench::ann {
+
+class Timer {
+ public:
+ Timer() { reset(); }
+ void reset() { start_time_ = std::chrono::steady_clock::now(); }
+ float elapsed_ms()
+ {
+ auto end_time = std::chrono::steady_clock::now();
+ auto dur =
+ std::chrono::duration_cast>(end_time - start_time_);
+ return dur.count();
+ }
+
+ private:
+ std::chrono::steady_clock::time_point start_time_;
+};
+
+std::vector split(const std::string& s, char delimiter);
+
+bool file_exists(const std::string& filename);
+bool dir_exists(const std::string& dir);
+bool create_dir(const std::string& dir);
+
+template
+void log_(const char* level, Ts... vs)
+{
+ char buf[20];
+ std::time_t now = std::time(nullptr);
+ std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now));
+ printf("%s [%s] ", buf, level);
+ printf(vs...);
+ printf("\n");
+ fflush(stdout);
+}
+
+template
+void log_info(Ts... vs)
+{
+ log_("info", vs...);
+}
+
+template
+void log_warn(Ts... vs)
+{
+ log_("warn", vs...);
+}
+
+template
+void log_error(Ts... vs)
+{
+ log_("error", vs...);
+}
+
+} // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/faiss/faiss_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
new file mode 100644
index 0000000000..294da9a14f
--- /dev/null
+++ b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "../common/ann_types.hpp"
+#undef WARP_SIZE
+#include "faiss_wrapper.h"
+#define JSON_DIAGNOSTICS 1
+#include
+
+namespace raft::bench::ann {
+
+template
+void parse_build_param(const nlohmann::json& conf,
+ typename raft::bench::ann::FaissGpuIVFFlat::BuildParam& param)
+{
+ param.nlist = conf.at("nlist");
+}
+
+template
+void parse_build_param(const nlohmann::json& conf,
+ typename raft::bench::ann::FaissGpuIVFPQ::BuildParam& param)
+{
+ param.nlist = conf.at("nlist");
+ param.M = conf.at("M");
+ if (conf.contains("usePrecomputed")) {
+ param.usePrecomputed = conf.at("usePrecomputed");
+ } else {
+ param.usePrecomputed = false;
+ }
+ if (conf.contains("useFloat16")) {
+ param.useFloat16 = conf.at("useFloat16");
+ } else {
+ param.useFloat16 = false;
+ }
+}
+
+template
+void parse_build_param(const nlohmann::json& conf,
+ typename raft::bench::ann::FaissGpuIVFSQ::BuildParam& param)
+{
+ param.nlist = conf.at("nlist");
+ param.quantizer_type = conf.at("quantizer_type");
+}
+
+template
+void parse_search_param(const nlohmann::json& conf,
+ typename raft::bench::ann::FaissGpu::SearchParam& param)
+{
+ param.nprobe = conf.at("nprobe");
+}
+
+template class Algo>
+std::unique_ptr> make_algo(raft::bench::ann::Metric metric,
+ int dim,
+ const nlohmann::json& conf)
+{
+ typename Algo::BuildParam param;
+ parse_build_param(conf, param);
+ return std::make_unique>(metric, dim, param);
+}
+
+template class Algo>
+std::unique_ptr> make_algo(raft::bench::ann::Metric metric,
+ int dim,
+ const nlohmann::json& conf,
+ const std::vector& dev_list)
+{
+ typename Algo::BuildParam param;
+ parse_build_param(conf, param);
+
+ (void)dev_list;
+ return std::make_unique>(metric, dim, param);
+}
+
+template
+std::unique_ptr> create_algo(const std::string& algo,
+ const std::string& distance,
+ int dim,
+ float refine_ratio,
+ const nlohmann::json& conf,
+ const std::vector& dev_list)
+{
+ // stop compiler warning; not all algorithms support multi-GPU so it may not be used
+ (void)dev_list;
+
+ raft::bench::ann::Metric metric = parse_metric(distance);
+ std::unique_ptr> ann;
+
+ if constexpr (std::is_same_v) {
+ if (algo == "faiss_gpu_ivf_flat") {
+ ann = make_algo(metric, dim, conf, dev_list);
+ } else if (algo == "faiss_gpu_ivf_pq") {
+ ann = make_algo(metric, dim, conf);
+ } else if (algo == "faiss_gpu_ivf_sq") {
+ ann = make_algo(metric, dim, conf);
+ } else if (algo == "faiss_gpu_flat") {
+ ann = std::make_unique>(metric, dim);
+ }
+ }
+
+ if constexpr (std::is_same_v) {}
+
+ if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
+
+ if (refine_ratio > 1.0) {}
+ return ann;
+}
+
+template
+std::unique_ptr::AnnSearchParam> create_search_param(
+ const std::string& algo, const nlohmann::json& conf)
+{
+ if (algo == "faiss_gpu_ivf_flat" || algo == "faiss_gpu_ivf_pq" || algo == "faiss_gpu_ivf_sq") {
+ auto param = std::make_unique::SearchParam>();
+ parse_search_param(conf, *param);
+ return param;
+ } else if (algo == "faiss_gpu_flat") {
+ auto param = std::make_unique::AnnSearchParam>();
+ return param;
+ }
+ // else
+ throw std::runtime_error("invalid algo: '" + algo + "'");
+}
+
+} // namespace raft::bench::ann
+
+#include "../common/benchmark.hpp"
+
+int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
\ No newline at end of file
diff --git a/cpp/bench/ann/src/faiss/faiss_wrapper.h b/cpp/bench/ann/src/faiss/faiss_wrapper.h
new file mode 100644
index 0000000000..8cfc26ea5b
--- /dev/null
+++ b/cpp/bench/ann/src/faiss/faiss_wrapper.h
@@ -0,0 +1,317 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef FAISS_WRAPPER_H_
+#define FAISS_WRAPPER_H_
+
+#include
+#include
+#include
+#include
+#include
+#include