From 2a895740e9cc7d89d775e7ea1757412df806ac87 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 5 Sep 2023 11:42:26 -0500 Subject: [PATCH] Add GPU and CPU packages for ANN benchmarks (#1773) Builds on top of #1769 - [x] Removes `libraft-ann-bench` C++ based package - [x] Creates `raft-ann-bench` packages that includes C++ tests as well as Python scripts - [x] `raft-ann-bench` package includes all tests for CPU and GPU - [x] `raft-ann-bench-cpu` package that does not depend on CUDA or RAFT GPU code - [x] Update docs - [x] Test artifacts and scripts in CI - [x] Minor code cleaning Some changes include: - Use `RAPIDS_DATASET_ROOT_DIR` env variable to indicate location of datasets (optional) consistent with other repos: https://docs.rapids.ai/maintainers/datasets/ - CPU and GPU packages are built in the existing GPU build GHA. Only the CUDA 12 jobs build the CPU packages. - Small change for invocation of scripts, for example: `python bench/ann/run.py --dataset deep-image-96-inner` is now `python -m raft-ann-bench.run --dataset deep-image-96-inner`, but still scripts meant to be invoked from the command line. Future improvements: - Remove use of popen python scripts from python scripts. - Improve printing and logging - Allow functions of package to be called from python scripts. Closes #1744 Authors: - Dante Gama Dessavre (https://github.com/dantegd) - Artem M. Chirkin (https://github.com/achirkin) - Divye Gala (https://github.com/divyegala) - Corey J. Nolet (https://github.com/cjnolet) Approvers: - Corey J. Nolet (https://github.com/cjnolet) - Divye Gala (https://github.com/divyegala) - AJ Schmidt (https://github.com/ajschmidt8) - Artem M. Chirkin (https://github.com/achirkin) URL: https://github.com/rapidsai/raft/pull/1773 --- build.sh | 19 +- ci/build_python.sh | 20 +- .../bench_ann_cuda-118_arch-x86_64.yaml | 3 - conda/recipes/libraft/build_libraft.sh | 2 +- .../recipes/libraft/build_libraft_headers.sh | 2 +- .../recipes/libraft/build_libraft_template.sh | 2 +- conda/recipes/libraft/build_libraft_tests.sh | 2 +- conda/recipes/libraft/meta.yaml | 59 ---- .../build.sh} | 2 +- .../conda_build_config.yaml | 20 ++ conda/recipes/raft-ann-bench-cpu/meta.yaml | 64 ++++ conda/recipes/raft-ann-bench/build.sh | 5 + .../raft-ann-bench/conda_build_config.yaml | 73 +++++ conda/recipes/raft-ann-bench/meta.yaml | 97 ++++++ cpp/CMakeLists.txt | 77 +++-- cpp/bench/ann/CMakeLists.txt | 41 ++- cpp/bench/ann/src/common/ann_types.hpp | 2 - cpp/bench/ann/src/common/cuda_stub.hpp | 10 +- cpp/bench/ann/src/common/dataset.hpp | 8 +- cpp/bench/ann/src/common/util.hpp | 16 +- .../ann/src/hnswlib/hnswlib_benchmark.cpp | 1 - dependencies.yaml | 21 +- docs/source/raft_ann_benchmarks.md | 74 ++--- python/raft-ann-bench/LICENSE | 1 + python/raft-ann-bench/pyproject.toml | 60 ++++ .../src/raft-ann-bench/__init__.py | 0 .../raft-ann-bench/data_export/__main__.py | 63 ++-- .../raft-ann-bench/get_dataset/__main__.py | 58 ++-- .../get_dataset}/fbin_to_f16bin.py | 29 +- .../get_dataset}/hdf5_to_fbin.py | 27 +- .../src/raft-ann-bench/plot/__main__.py | 290 +++++++++++++----- .../src/raft-ann-bench/run/__main__.py | 225 +++++++++----- .../src/raft-ann-bench/run}/algos.yaml | 20 +- .../raft-ann-bench/run}/conf/bigann-100M.json | 0 .../raft-ann-bench/run}/conf/deep-100M.json | 0 .../src/raft-ann-bench/run}/conf/deep-1B.json | 0 .../run/conf/deep-image-96-inner.json | 154 +++++----- .../conf/fashion-mnist-784-euclidean.json | 4 +- .../run}/conf/gist-960-euclidean.json | 4 +- .../run}/conf/glove-100-angular.json | 4 +- .../run}/conf/glove-100-inner.json | 0 .../run}/conf/glove-50-angular.json | 4 +- .../run}/conf/lastfm-65-angular.json | 4 +- .../run}/conf/mnist-784-euclidean.json | 4 +- .../run}/conf/nytimes-256-angular.json | 4 +- .../run}/conf/sift-128-euclidean.json | 0 .../split_groundtruth/__main__.py | 21 +- .../split_groundtruth}/split_groundtruth.pl | 0 48 files changed, 1104 insertions(+), 492 deletions(-) rename conda/recipes/{libraft/build_libraft_nn_bench.sh => raft-ann-bench-cpu/build.sh} (54%) create mode 100644 conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml create mode 100644 conda/recipes/raft-ann-bench-cpu/meta.yaml create mode 100644 conda/recipes/raft-ann-bench/build.sh create mode 100644 conda/recipes/raft-ann-bench/conda_build_config.yaml create mode 100644 conda/recipes/raft-ann-bench/meta.yaml create mode 120000 python/raft-ann-bench/LICENSE create mode 100644 python/raft-ann-bench/pyproject.toml create mode 100644 python/raft-ann-bench/src/raft-ann-bench/__init__.py rename bench/ann/data_export.py => python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py (53%) rename bench/ann/get_dataset.py => python/raft-ann-bench/src/raft-ann-bench/get_dataset/__main__.py (61%) rename {cpp/bench/ann/scripts => python/raft-ann-bench/src/raft-ann-bench/get_dataset}/fbin_to_f16bin.py (57%) rename {cpp/bench/ann/scripts => python/raft-ann-bench/src/raft-ann-bench/get_dataset}/hdf5_to_fbin.py (78%) rename bench/ann/plot.py => python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py (60%) rename bench/ann/run.py => python/raft-ann-bench/src/raft-ann-bench/run/__main__.py (52%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/algos.yaml (70%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/bigann-100M.json (100%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/deep-100M.json (100%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/deep-1B.json (100%) rename bench/ann/conf/deep-image-96-angular.json => python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json (77%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/fashion-mnist-784-euclidean.json (99%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/gist-960-euclidean.json (99%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/glove-100-angular.json (99%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/glove-100-inner.json (100%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/glove-50-angular.json (99%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/lastfm-65-angular.json (99%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/mnist-784-euclidean.json (99%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/nytimes-256-angular.json (99%) rename {bench/ann => python/raft-ann-bench/src/raft-ann-bench/run}/conf/sift-128-euclidean.json (100%) rename bench/ann/split_groundtruth.py => python/raft-ann-bench/src/raft-ann-bench/split_groundtruth/__main__.py (63%) rename {cpp/bench/ann/scripts => python/raft-ann-bench/src/raft-ann-bench/split_groundtruth}/split_groundtruth.pl (100%) diff --git a/build.sh b/build.sh index 6d5ebb3d97..071820ba93 100755 --- a/build.sh +++ b/build.sh @@ -18,7 +18,7 @@ ARGS=$* # scripts, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall -v -g -n --compile-lib --compile-static-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h" +VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall -v -g -n --compile-lib --compile-static-lib --allgpuarch --no-nvtx --cpu-only --show_depr_warn --incl-cache-stats --time -h" HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool=] [--limit-tests=] [--limit-bench-prims=] [--limit-bench-ann=] [--build-metrics=] where is: clean - remove all existing build artifacts and configuration (start over) @@ -39,6 +39,7 @@ HELP="$0 [ ...] [ ...] [--cmake-args=\"\"] [--cache-tool==1.7.1 -- matplotlib - nccl>=2.9.9 - ninja - nlohmann_json>=3.11.2 -- pandas -- pyyaml - scikit-build>=0.13.1 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/recipes/libraft/build_libraft.sh b/conda/recipes/libraft/build_libraft.sh index 7d4173e8bb..71e1533893 100644 --- a/conda/recipes/libraft/build_libraft.sh +++ b/conda/recipes/libraft/build_libraft.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash # Copyright (c) 2022-2023, NVIDIA CORPORATION. -./build.sh libraft --allgpuarch --compile-lib --build-metrics=compile_lib --incl-cache-stats --no-nvtx +./build.sh libraft -v --allgpuarch --compile-lib --build-metrics=compile_lib --incl-cache-stats --no-nvtx diff --git a/conda/recipes/libraft/build_libraft_headers.sh b/conda/recipes/libraft/build_libraft_headers.sh index cc3b840e43..330ac92ff3 100644 --- a/conda/recipes/libraft/build_libraft_headers.sh +++ b/conda/recipes/libraft/build_libraft_headers.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash # Copyright (c) 2022-2023, NVIDIA CORPORATION. -./build.sh libraft --allgpuarch --no-nvtx +./build.sh libraft -v --allgpuarch --no-nvtx diff --git a/conda/recipes/libraft/build_libraft_template.sh b/conda/recipes/libraft/build_libraft_template.sh index bd7719af76..974b0a5b58 100644 --- a/conda/recipes/libraft/build_libraft_template.sh +++ b/conda/recipes/libraft/build_libraft_template.sh @@ -2,4 +2,4 @@ # Copyright (c) 2022-2023, NVIDIA CORPORATION. # Just building template so we verify it uses libraft.so and fail if it doesn't build -./build.sh template +./build.sh template -v diff --git a/conda/recipes/libraft/build_libraft_tests.sh b/conda/recipes/libraft/build_libraft_tests.sh index 05a2b59eb0..08f0d33485 100644 --- a/conda/recipes/libraft/build_libraft_tests.sh +++ b/conda/recipes/libraft/build_libraft_tests.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash # Copyright (c) 2022-2023, NVIDIA CORPORATION. -./build.sh tests bench-prims --allgpuarch --no-nvtx --build-metrics=tests_bench_prims --incl-cache-stats +./build.sh tests bench-prims -v --allgpuarch --no-nvtx --build-metrics=tests_bench_prims --incl-cache-stats cmake --install cpp/build --component testing diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml index af542c566b..f4d133d714 100644 --- a/conda/recipes/libraft/meta.yaml +++ b/conda/recipes/libraft/meta.yaml @@ -320,62 +320,3 @@ outputs: home: https://rapids.ai/ license: Apache-2.0 summary: libraft template - - name: libraft-ann-bench - version: {{ version }} - script: build_libraft_nn_bench.sh - build: - script_env: *script_env - number: {{ GIT_DESCRIBE_NUMBER }} - string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} - ignore_run_exports_from: - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} - {% endif %} - requirements: - build: - - {{ compiler('c') }} - - {{ compiler('cxx') }} - {% if cuda_major == "11" %} - - {{ compiler('cuda11') }} ={{ cuda_version }} - {% else %} - - {{ compiler('cuda') }} - {% endif %} - - cuda-version ={{ cuda_version }} - - cmake {{ cmake_version }} - - ninja - - sysroot_{{ target_platform }} {{ sysroot_version }} - host: - - {{ pin_subpackage('libraft', exact=True) }} - - cuda-version ={{ cuda_version }} - {% if cuda_major == "11" %} - - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }} - - libcublas {{ cuda11_libcublas_host_version }} - - libcublas-dev {{ cuda11_libcublas_host_version }} - {% else %} - - cuda-profiler-api - - libcublas-dev - {% endif %} - - glog {{ glog_version }} - - nlohmann_json {{ nlohmann_json_version }} - # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet - {% if cuda_major == "11" %} - - faiss-proc=*=cuda - - libfaiss {{ faiss_version }} - {% endif %} - run: - - {{ pin_subpackage('libraft', exact=True) }} - - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - {% if cuda_major == "11" %} - - cudatoolkit - {% endif %} - - glog {{ glog_version }} - # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet - {% if cuda_major == "11" %} - - faiss-proc=*=cuda - - libfaiss {{ faiss_version }} - {% endif %} - - h5py {{ h5py_version }} - about: - home: https://rapids.ai/ - license: Apache-2.0 - summary: libraft ann bench diff --git a/conda/recipes/libraft/build_libraft_nn_bench.sh b/conda/recipes/raft-ann-bench-cpu/build.sh similarity index 54% rename from conda/recipes/libraft/build_libraft_nn_bench.sh rename to conda/recipes/raft-ann-bench-cpu/build.sh index 00078792a1..4462d5124b 100644 --- a/conda/recipes/libraft/build_libraft_nn_bench.sh +++ b/conda/recipes/raft-ann-bench-cpu/build.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash # Copyright (c) 2023, NVIDIA CORPORATION. -./build.sh bench-ann --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats +./build.sh bench-ann --cpu-only --no-nvtx --build-metrics=bench_ann_cpu --incl-cache-stats cmake --install cpp/build --component ann_bench diff --git a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml new file mode 100644 index 0000000000..0bd424f85b --- /dev/null +++ b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml @@ -0,0 +1,20 @@ +c_compiler_version: + - 11 + +cxx_compiler_version: + - 11 + +sysroot_version: + - "2.17" + +cmake_version: + - ">=3.26.4" + +glog_version: + - ">=0.6.0" + +h5py_version: + - ">=3.8.0" + +nlohmann_json_version: + - ">=3.11.2" diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml new file mode 100644 index 0000000000..355ea640ff --- /dev/null +++ b/conda/recipes/raft-ann-bench-cpu/meta.yaml @@ -0,0 +1,64 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +# Usage: +# conda build . -c conda-forge -c nvidia -c rapidsai +{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %} +{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version = environ['CONDA_PY'] %} +{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %} +{% set date_string = environ['RAPIDS_DATE_STRING'] %} + +package: + name: raft-ann-bench-cpu + version: {{ version }} + script: build.sh + +source: + git_url: ../../.. + +build: + script_env: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL + - RAPIDS_ARTIFACTS_DIR + - SCCACHE_BUCKET + - SCCACHE_IDLE_TIMEOUT + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64] + - SCCACHE_S3_USE_SSL + number: {{ GIT_DESCRIBE_NUMBER }} + string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + +requirements: + build: + - {{ compiler('c') }} + - {{ compiler('cxx') }} + - cmake {{ cmake_version }} + - ninja + - sysroot_{{ target_platform }} {{ sysroot_version }} + + host: + - glog {{ glog_version }} + - matplotlib + - nlohmann_json {{ nlohmann_json_version }} + - python + - pyyaml + + run: + - glog {{ glog_version }} + - h5py {{ h5py_version }} + - matplotlib + - python + - pyyaml + +about: + home: https://rapids.ai/ + license: Apache-2.0 + summary: libraft ann bench diff --git a/conda/recipes/raft-ann-bench/build.sh b/conda/recipes/raft-ann-bench/build.sh new file mode 100644 index 0000000000..9c411774b6 --- /dev/null +++ b/conda/recipes/raft-ann-bench/build.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +# Copyright (c) 2023, NVIDIA CORPORATION. + +./build.sh bench-ann -v --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats +cmake --install cpp/build --component ann_bench diff --git a/conda/recipes/raft-ann-bench/conda_build_config.yaml b/conda/recipes/raft-ann-bench/conda_build_config.yaml new file mode 100644 index 0000000000..d156f2609b --- /dev/null +++ b/conda/recipes/raft-ann-bench/conda_build_config.yaml @@ -0,0 +1,73 @@ +c_compiler_version: + - 11 + +cxx_compiler_version: + - 11 + +cuda_compiler: + - cuda-nvcc + +cuda11_compiler: + - nvcc + +sysroot_version: + - "2.17" + +cmake_version: + - ">=3.26.4" + +nccl_version: + - ">=2.9.9" + +gtest_version: + - ">=1.13.0" + +glog_version: + - ">=0.6.0" + +faiss_version: + - ">=1.7.1" + +h5py_version: + - ">=3.8.0" + +nlohmann_json_version: + - ">=3.11.2" + +# The CTK libraries below are missing from the conda-forge::cudatoolkit package +# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages +# and the "*_run_*" version specifiers correspond to `11.x` packages. + +cuda11_libcublas_host_version: + - "=11.11.3.6" + +cuda11_libcublas_run_version: + - ">=11.5.2.43,<12.0.0" + +cuda11_libcurand_host_version: + - "=10.3.0.86" + +cuda11_libcurand_run_version: + - ">=10.2.5.43,<10.3.1" + +cuda11_libcusolver_host_version: + - "=11.4.1.48" + +cuda11_libcusolver_run_version: + - ">=11.2.0.43,<11.4.2" + +cuda11_libcusparse_host_version: + - "=11.7.5.86" + +cuda11_libcusparse_run_version: + - ">=11.6.0.43,<12.0.0" + +# `cuda-profiler-api` only has `11.8.0` and `12.0.0` packages for all +# architectures. The "*_host_*" version specifiers correspond to `11.8` packages and the +# "*_run_*" version specifiers correspond to `11.x` packages. + +cuda11_cuda_profiler_api_host_version: + - "=11.8.86" + +cuda11_cuda_profiler_api_run_version: + - ">=11.4.240,<12" diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml new file mode 100644 index 0000000000..882ff6cc49 --- /dev/null +++ b/conda/recipes/raft-ann-bench/meta.yaml @@ -0,0 +1,97 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. + +# Usage: +# conda build . -c conda-forge -c nvidia -c rapidsai +{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %} +{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version = environ['CONDA_PY'] %} +{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %} +{% set cuda_major = cuda_version.split('.')[0] %} +{% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0 +{% set date_string = environ['RAPIDS_DATE_STRING'] %} + +package: + name: raft-ann-bench + version: {{ version }} + script: build.sh + +source: + git_url: ../../.. + +build: + script_env: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL + - RAPIDS_ARTIFACTS_DIR + - SCCACHE_BUCKET + - SCCACHE_IDLE_TIMEOUT + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64] + - SCCACHE_S3_USE_SSL + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% endif %} + +requirements: + build: + - {{ compiler('c') }} + - {{ compiler('cxx') }} + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} ={{ cuda_version }} + {% else %} + - {{ compiler('cuda') }} + {% endif %} + - cuda-version ={{ cuda_version }} + - cmake {{ cmake_version }} + - ninja + - sysroot_{{ target_platform }} {{ sysroot_version }} + + host: + - python + - libraft {{ version }} + - cuda-version ={{ cuda_version }} + {% if cuda_major == "11" %} + - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }} + - libcublas {{ cuda11_libcublas_host_version }} + - libcublas-dev {{ cuda11_libcublas_host_version }} + {% else %} + - cuda-profiler-api + - libcublas-dev + {% endif %} + - glog {{ glog_version }} + - nlohmann_json {{ nlohmann_json_version }} + # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet + {% if cuda_major == "11" %} + - faiss-proc=*=cuda + - libfaiss {{ faiss_version }} + {% endif %} + + run: + - python + - libraft {{ version }} + - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} + {% if cuda_major == "11" %} + - cudatoolkit + {% endif %} + - glog {{ glog_version }} + # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet + {% if cuda_major == "11" %} + - faiss-proc=*=cuda + - libfaiss {{ faiss_version }} + {% endif %} + - h5py {{ h5py_version }} + +about: + home: https://rapids.ai/ + license: Apache-2.0 + summary: RAFT ANN GPU and CPU benchmarks diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 309360fd11..d93b19f784 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -17,16 +17,24 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR) include(../fetch_rapids.cmake) include(rapids-cmake) include(rapids-cpm) -include(rapids-cuda) include(rapids-export) include(rapids-find) -rapids_cuda_init_architectures(RAFT) +option(BUILD_CPU_ONLY "Build CPU only components. Applies to RAFT ANN benchmarks currently" OFF) + +# workaround for rapids_cuda_init_architectures not working for arch detection with enable_language(CUDA) +set(lang_list "CXX") + +if(NOT BUILD_CPU_ONLY) + include(rapids-cuda) + rapids_cuda_init_architectures(RAFT) + list(APPEND lang_list "CUDA") +endif() project( RAFT VERSION ${RAFT_VERSION} - LANGUAGES CXX CUDA + LANGUAGES ${lang_list} ) # Write the version header @@ -60,9 +68,11 @@ option(DISABLE_OPENMP "Disable OpenMP" OFF) option(RAFT_NVTX "Enable nvtx markers" OFF) set(RAFT_COMPILE_LIBRARY_DEFAULT OFF) -if(BUILD_TESTS - OR BUILD_PRIMS_BENCH - OR BUILD_ANN_BENCH +if((BUILD_TESTS + OR BUILD_PRIMS_BENCH + OR BUILD_ANN_BENCH + ) + AND NOT BUILD_CPU_ONLY ) set(RAFT_COMPILE_LIBRARY_DEFAULT ON) endif() @@ -70,6 +80,11 @@ option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations" ${RAFT_COMPILE_LIBRARY_DEFAULT} ) +if(BUILD_CPU_ONLY) + set(BUILD_SHARED_LIBS OFF) + set(BUILD_TESTS OFF) +endif() + # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to # have different values for the `Threads::Threads` target. Setting this flag ensures # `Threads::Threads` is the same value across all builds so that cache hits occur @@ -82,6 +97,8 @@ include(CMakeDependentOption) message(VERBOSE "RAFT: Building optional components: ${raft_FIND_COMPONENTS}") message(VERBOSE "RAFT: Build RAFT unit-tests: ${BUILD_TESTS}") message(VERBOSE "RAFT: Building raft C++ benchmarks: ${BUILD_PRIMS_BENCH}") +message(VERBOSE "RAFT: Building ANN benchmarks: ${BUILD_ANN_BENCH}") +message(VERBOSE "RAFT: Build CPU only components: ${BUILD_CPU_ONLY}") message(VERBOSE "RAFT: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}") message(VERBOSE "RAFT: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS}) message(VERBOSE "RAFT: Disable OpenMP: ${DISABLE_OPENMP}") @@ -116,15 +133,28 @@ if(DETECT_CONDA_ENV) endif() # ################################################################################################## -# * compiler options --------------------------------------------------------- +# * compiler options ---------------------------------------------------------- set(_ctk_static_suffix "") if(CUDA_STATIC_RUNTIME) set(_ctk_static_suffix "_static") endif() -# CUDA runtime -rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME}) +if(NOT BUILD_CPU_ONLY) + # CUDA runtime + rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME}) + # * find CUDAToolkit package + # * determine GPU architectures + # * enable the CMake CUDA language + # * set other CUDA compilation flags + rapids_find_package( + CUDAToolkit REQUIRED + BUILD_EXPORT_SET raft-exports + INSTALL_EXPORT_SET raft-exports + ) +else() + add_compile_definitions(BUILD_CPU_ONLY) +endif() if(NOT DISABLE_OPENMP) rapids_find_package( @@ -137,15 +167,6 @@ if(NOT DISABLE_OPENMP) endif() endif() -# * find CUDAToolkit package -# * determine GPU architectures -# * enable the CMake CUDA language -# * set other CUDA compilation flags -rapids_find_package( - CUDAToolkit REQUIRED - BUILD_EXPORT_SET raft-exports - INSTALL_EXPORT_SET raft-exports -) include(cmake/modules/ConfigureCUDA.cmake) # ################################################################################################## @@ -154,13 +175,15 @@ include(cmake/modules/ConfigureCUDA.cmake) # add third party dependencies using CPM rapids_cpm_init() -# thrust before rmm/cuco so we get the right version of thrust/cub -include(cmake/thirdparty/get_thrust.cmake) -include(cmake/thirdparty/get_rmm.cmake) -include(cmake/thirdparty/get_cutlass.cmake) +if(NOT BUILD_CPU_ONLY) + # thrust before rmm/cuco so we get the right version of thrust/cub + include(cmake/thirdparty/get_thrust.cmake) + include(cmake/thirdparty/get_rmm.cmake) + include(cmake/thirdparty/get_cutlass.cmake) -include(${rapids-cmake-dir}/cpm/cuco.cmake) -rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports) + include(${rapids-cmake-dir}/cpm/cuco.cmake) + rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports) +endif() if(BUILD_TESTS) include(cmake/thirdparty/get_gtest.cmake) @@ -180,8 +203,10 @@ target_include_directories( raft INTERFACE "$" "$" ) -# Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target. -target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass raft::Thrust) +if(NOT BUILD_CPU_ONLY) + # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target. + target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass raft::Thrust) +endif() target_compile_features(raft INTERFACE cxx_std_17 $) target_compile_options( diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 54cddb678e..8985be328b 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= # ################################################################################################## -# * compiler function ----------------------------------------------------------------------------- +# * benchmark options ------------------------------------------------------------------------------ option(RAFT_ANN_BENCH_USE_FAISS_BFKNN "Include faiss' brute-force knn algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON) @@ -27,14 +27,27 @@ option(RAFT_ANN_BENCH_SINGLE_EXE "Make a single executable with benchmark as shared library modules" OFF ) +# ################################################################################################## +# * Process options ---------------------------------------------------------- + find_package(Threads REQUIRED) -# Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled. -# https://github.com/rapidsai/raft/issues/1627 -if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0) +if(BUILD_CPU_ONLY) set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF) set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF) set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF) + set(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OFF) + set(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OFF) + set(RAFT_ANN_BENCH_USE_RAFT_CAGRA OFF) + set(RAFT_ANN_BENCH_USE_GGNN OFF) +else() + # Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled. + # https://github.com/rapidsai/raft/issues/1627 + if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0) + set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF) + set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF) + set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF) + endif() endif() set(RAFT_ANN_BENCH_USE_FAISS OFF) @@ -53,12 +66,13 @@ if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ set(RAFT_ANN_BENCH_USE_RAFT ON) endif() +# ################################################################################################## +# * Fetch requirements ------------------------------------------------------------- + if(RAFT_ANN_BENCH_USE_HNSWLIB) include(cmake/thirdparty/get_hnswlib.cmake) endif() -option(RAFT_ANN_BENCH_USE_MULTIGPU "Use multi-gpus (where possible) in benchmarks" OFF) - include(cmake/thirdparty/get_nlohmann_json.cmake) if(RAFT_ANN_BENCH_USE_GGNN) @@ -69,11 +83,18 @@ if(RAFT_ANN_BENCH_USE_FAISS) include(cmake/thirdparty/get_faiss.cmake) endif() +# ################################################################################################## +# * Configure tests function------------------------------------------------------------- + function(ConfigureAnnBench) set(oneValueArgs NAME) set(multiValueArgs PATH LINKS CXXFLAGS INCLUDES) + if(NOT BUILD_CPU_ONLY) + set(GPU_BUILD ON) + endif() + cmake_parse_arguments( ConfigureAnnBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) @@ -95,10 +116,10 @@ function(ConfigureAnnBench) ${BENCH_NAME} PRIVATE raft::raft nlohmann_json::nlohmann_json - $<$:NCCL::NCCL> + $<$:$<$:NCCL::NCCL>> ${ConfigureAnnBench_LINKS} Threads::Threads - ${RAFT_CTK_MATH_DEPENDENCIES} + $<$:${RAFT_CTK_MATH_DEPENDENCIES}> $ $ -static-libgcc @@ -143,10 +164,12 @@ function(ConfigureAnnBench) TARGETS ${BENCH_NAME} COMPONENT ann_bench DESTINATION bin/ann - EXCLUDE_FROM_ALL ) endfunction() +# ################################################################################################## +# * Configure tests------------------------------------------------------------- + if(RAFT_ANN_BENCH_USE_HNSWLIB) ConfigureAnnBench( NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp INCLUDES diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp index bdcfd95b2e..33716bd45a 100644 --- a/cpp/bench/ann/src/common/ann_types.hpp +++ b/cpp/bench/ann/src/common/ann_types.hpp @@ -1,5 +1,3 @@ - - /* * Copyright (c) 2023, NVIDIA CORPORATION. * diff --git a/cpp/bench/ann/src/common/cuda_stub.hpp b/cpp/bench/ann/src/common/cuda_stub.hpp index 7c11194842..b2e3130304 100644 --- a/cpp/bench/ann/src/common/cuda_stub.hpp +++ b/cpp/bench/ann/src/common/cuda_stub.hpp @@ -18,11 +18,11 @@ /* The content of this header is governed by two preprocessor definitions: - - CPU_ONLY - whether none of the CUDA functions are used. + - BUILD_CPU_ONLY - whether none of the CUDA functions are used. - ANN_BENCH_LINK_CUDART - dynamically link against this string if defined. ______________________________________________________________________________ -|CPU_ONLY | ANN_BENCH_LINK_CUDART | cudart | cuda_runtime_api.h | +|BUILD_CPU_ONLY | ANN_BENCH_LINK_CUDART | cudart | cuda_runtime_api.h | | | | found | needed | included | |---------|-----------------------|-----------|---------|--------------------| | ON | | false | false | NO | @@ -32,7 +32,7 @@ ______________________________________________________________________________ ------------------------------------------------------------------------------ */ -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY #include #ifdef ANN_BENCH_LINK_CUDART #include @@ -96,7 +96,7 @@ struct cuda_lib_handle { /** Whether this is NOT a cpu-only package. */ [[nodiscard]] constexpr inline auto needed() const -> bool { -#if defined(CPU_ONLY) +#if defined(BUILD_CPU_ONLY) return false; #else return true; @@ -106,7 +106,7 @@ struct cuda_lib_handle { /** CUDA found, either at compile time or at runtime. */ [[nodiscard]] inline auto found() const -> bool { -#if defined(CPU_ONLY) +#if defined(BUILD_CPU_ONLY) return false; #elif defined(ANN_BENCH_LINK_CUDART) return handle != nullptr; diff --git a/cpp/bench/ann/src/common/dataset.hpp b/cpp/bench/ann/src/common/dataset.hpp index 7fa82a632f..ccc5915b3c 100644 --- a/cpp/bench/ann/src/common/dataset.hpp +++ b/cpp/bench/ann/src/common/dataset.hpp @@ -17,7 +17,7 @@ #include "util.hpp" -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY #include #else typedef uint16_t half; @@ -326,7 +326,7 @@ Dataset::~Dataset() delete[] base_set_; delete[] query_set_; delete[] gt_set_; -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY if (d_base_set_) { cudaFree(d_base_set_); } if (d_query_set_) { cudaFree(d_query_set_); } #endif @@ -335,7 +335,7 @@ Dataset::~Dataset() template const T* Dataset::base_set_on_gpu() const { -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY if (!d_base_set_) { base_set(); cudaMalloc((void**)&d_base_set_, base_set_size() * dim() * sizeof(T)); @@ -348,7 +348,7 @@ const T* Dataset::base_set_on_gpu() const template const T* Dataset::query_set_on_gpu() const { -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY if (!d_query_set_) { query_set(); cudaMalloc((void**)&d_query_set_, query_set_size() * dim() * sizeof(T)); diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp index 49c4de1b11..e9e4a9ad21 100644 --- a/cpp/bench/ann/src/common/util.hpp +++ b/cpp/bench/ann/src/common/util.hpp @@ -47,7 +47,7 @@ struct buf { : memory_type(memory_type), size(size), data(nullptr) { switch (memory_type) { -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY case MemoryType::Device: { cudaMalloc(reinterpret_cast(&data), size * sizeof(T)); cudaMemset(data, 0, size * sizeof(T)); @@ -63,7 +63,7 @@ struct buf { { if (data == nullptr) { return; } switch (memory_type) { -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY case MemoryType::Device: { cudaFree(data); } break; @@ -77,7 +77,7 @@ struct buf { [[nodiscard]] auto move(MemoryType target_memory_type) -> buf { buf r{target_memory_type, size}; -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY if ((memory_type == MemoryType::Device && target_memory_type != MemoryType::Device) || (memory_type != MemoryType::Device && target_memory_type == MemoryType::Device)) { cudaMemcpy(r.data, data, size * sizeof(T), cudaMemcpyDefault); @@ -108,7 +108,7 @@ struct cuda_timer { cuda_lap(cudaStream_t stream, cudaEvent_t start, cudaEvent_t stop, double& total_time) : start_(start), stop_(stop), stream_(stream), total_time_(total_time) { -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY cudaStreamSynchronize(stream_); cudaEventRecord(start_, stream_); #endif @@ -117,7 +117,7 @@ struct cuda_timer { ~cuda_lap() noexcept { -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY cudaEventRecord(stop_, stream_); cudaEventSynchronize(stop_); float milliseconds = 0.0f; @@ -129,7 +129,7 @@ struct cuda_timer { cuda_timer() { -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking); cudaEventCreate(&stop_); cudaEventCreate(&start_); @@ -138,7 +138,7 @@ struct cuda_timer { ~cuda_timer() noexcept { -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY cudaEventDestroy(start_); cudaEventDestroy(stop_); cudaStreamDestroy(stream_); @@ -158,7 +158,7 @@ struct cuda_timer { inline auto cuda_info() { std::vector> props; -#ifndef CPU_ONLY +#ifndef BUILD_CPU_ONLY int dev, driver = 0, runtime = 0; cudaDriverGetVersion(&driver); cudaRuntimeGetVersion(&runtime); diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp index be5b72c5f6..7d96e54989 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp +++ b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp @@ -116,7 +116,6 @@ REGISTER_ALGO_INSTANCE(std::int8_t); REGISTER_ALGO_INSTANCE(std::uint8_t); #ifdef ANN_BENCH_BUILD_MAIN -#define CPU_ONLY #include "../common/benchmark.hpp" int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); } #endif diff --git a/dependencies.yaml b/dependencies.yaml index 05f5e5e2ce..6f64287f54 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -98,6 +98,20 @@ files: key: test includes: - test_python_common + py_build_raft_ann_bench: + output: pyproject + pyproject_dir: python/raft-ann-bench + extras: + table: build-system + includes: + - build_wheels + py_run_raft_ann_bench: + output: pyproject + pyproject_dir: python/raft-ann-bench + extras: + table: project + includes: + - nn_bench_python channels: - rapidsai - rapidsai-nightly @@ -161,7 +175,7 @@ dependencies: - clang-tools=16.0.1 nn_bench: common: - - output_types: [conda] + - output_types: [conda, pyproject, requirements] packages: - hnswlib=0.7.0 - nlohmann_json>=3.11.2 @@ -170,9 +184,14 @@ dependencies: - libfaiss>=1.7.1 - benchmark>=1.8.2 - faiss-proc=*=cuda + nn_bench_python: + common: + - output_types: [conda] + packages: - matplotlib - pandas - pyyaml + - pandas cudatoolkit: specific: diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index aae4a07100..e0c02bb7eb 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -4,24 +4,28 @@ This project provides a benchmark program for various ANN search implementations ## Installing the benchmarks -The easiest way to install these benchmarks is through conda. We suggest using mamba as it generally leads to a faster install time:: +The easiest way to install these benchmarks is through conda. We provide packages for GPU enabled systems, as well for systems without a GPU. We suggest using mamba as it generally leads to a faster install time: + ```bash -git clone https://github.com/rapidsai/raft.git && cd raft -export RAFT_HOME=$(pwd) -mamba env create --name raft_ann_benchmarks -f conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +mamba env create --name raft_ann_benchmarks conda activate raft_ann_benchmarks -mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-ann-bench cudatoolkit=11.8* +# to install GPU package: +mamba install -c rapidsai -c conda-forge -c nvidia raft-ann-bench cuda-version=11.8* + +# to install CPU package for usage in CPU-only systems: +mamba install -c rapidsai -c conda-forge raft-ann-bench-cpu ``` -The channel `rapidsai` can easily be substituted `rapidsai-nightly` if nightly benchmarks are desired. + +The channel `rapidsai` can easily be substituted `rapidsai-nightly` if nightly benchmarks are desired. The CPU package currently allows to run the HNSW benchmarks. Please see the [build instructions](ann_benchmarks_build.md) to build the benchmarks from source. ## Running the benchmarks ### Usage -There are 4 general steps to running the benchmarks and vizualizing the results: +There are 3 general steps to running the benchmarks and vizualizing the results: 1. Prepare Dataset 2. Build Index and Search Index 3. Data Export @@ -35,26 +39,24 @@ expected to be defined to run these scripts; this variable holds the directory w ### End-to-end example: Million-scale -The steps below demonstrate how to download, install, and run benchmarks on a subset of 10M vectors from the Yandex Deep-1B dataset. +The steps below demonstrate how to download, install, and run benchmarks on a subset of 10M vectors from the Yandex Deep-1B dataset By default the datasets will be stored and used from the folder indicated by the RAPIDS_DATASET_ROOT_DIR environment variable if defined, otherwise a datasets subfolder from where the script is being called: ```bash -export RAFT_HOME=$(pwd) -# All scripts are present in directory raft/bench/ann -# (1) prepare dataset -python bench/ann/get_dataset.py --dataset deep-image-96-angular --normalize +# (1) prepare dataset. +python -m raft-ann-bench.get_dataset --dataset deep-image-96-angular --normalize # (2) build and search index -python bench/ann/run.py --dataset deep-image-96-inner +python -m raft-ann-bench.run --dataset deep-image-96-inner # (3) export data -python bench/ann/data_export.py --dataset deep-image-96-inner +python -m raft-ann-bench.data_export --dataset deep-image-96-inner # (4) plot results -python bench/ann/plot.py --dataset deep-image-96-inner +python -m raft-ann-bench.plot --dataset deep-image-96-inner ``` -Configuration files already exist for the following list of the million-scale datasets. These all work out-of-the-box with the `--dataset` argument. Other million-scale datasets from `ann-benchmarks.com` will work, but will require a json configuration file to be created in `bench/ann/conf`. +Configuration files already exist for the following list of the million-scale datasets. These all work out-of-the-box with the `--dataset` argument. Other million-scale datasets from `ann-benchmarks.com` will work, but will require a json configuration file to be created in `python/raft-ann-bench/src/raft-ann-bench/conf`. - `deep-image-96-angular` - `fashion-mnist-784-euclidean` - `glove-50-angular` @@ -65,7 +67,7 @@ Configuration files already exist for the following list of the million-scale da - `sift-128-euclidean` ### End-to-end example: Billion-scale -`bench/ann/get_dataset.py` cannot be used to download the [billion-scale datasets](ann_benchmarks_dataset.md#billion-scale) +`raft-ann-bench.get_dataset` cannot be used to download the [billion-scale datasets](ann_benchmarks_dataset.md#billion-scale) because they are so large. You should instead use our billion-scale datasets guide to download and prepare them. All other python mentioned below work as intended once the billion-scale dataset has been downloaded. @@ -73,27 +75,25 @@ To download Billion-scale datasets, visit [big-ann-benchmarks](http://big-ann-be The steps below demonstrate how to download, install, and run benchmarks on a subset of 100M vectors from the Yandex Deep-1B dataset. Please note that datasets of this scale are recommended for GPUs with larger amounts of memory, such as the A100 or H100. ```bash -export RAFT_HOME=$(pwd) -# All scripts are present in directory raft/bench/ann -mkdir -p bench/ann/data/deep-1B +mkdir -p datasets/deep-1B # (1) prepare dataset # download manually "Ground Truth" file of "Yandex DEEP" # suppose the file name is deep_new_groundtruth.public.10K.bin -python bench/ann/split_groundtruth.py --groundtruth bench/ann/data/deep-1B/deep_new_groundtruth.public.10K.bin +python python -m raft-ann-bench.split_groundtruth --groundtruth datasets/deep-1B/deep_new_groundtruth.public.10K.bin # two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced # (2) build and search index -python bench/ann/run.py --dataset deep-1B +python python -m raft-ann-bench.run --dataset deep-1B # (3) export data -python bench/ann/data_export.py --dataset deep-1B +python python -m raft-ann-bench.data_export --dataset deep-1B # (4) plot results -python bench/ann/plot.py --dataset deep-1B +python python -m raft-ann-bench.plot --dataset deep-1B ``` -The usage of `bench/ann/split-groundtruth.py` is: +The usage of `python -m raft-ann-bench.split-groundtruth` is: ```bash usage: split_groundtruth.py [-h] --groundtruth GROUNDTRUTH @@ -104,7 +104,7 @@ options: ``` ##### Step 1: Prepare Dataset -The script `bench/ann/get_dataset.py` will download and unpack the dataset in directory +The script `raft-ann-bench.get_dataset` will download and unpack the dataset in directory that the user provides. As of now, only million-scale datasets are supported by this script. For more information on [datasets and formats](ann_benchmarks_dataset.md). @@ -116,13 +116,13 @@ options: -h, --help show this help message and exit --dataset DATASET dataset to download (default: glove-100-angular) --dataset-path DATASET_PATH - path to download dataset (default: ${RAFT_HOME}/bench/ann/data) + path to download dataset (default: ${RAPIDS_DATASET_ROOT_DIR}) --normalize normalize cosine distance to inner product (default: False) ``` When option `normalize` is provided to the script, any dataset that has cosine distances will be normalized to inner product. So, for example, the dataset `glove-100-angular` -will be written at location `${RAFT_HOME}/bench/ann/data/glove-100-inner/`. +will be written at location `datasets/glove-100-inner/`. #### Step 2: Build and Search Index The script `bench/ann/run.py` will build and search indices for a given dataset and its @@ -133,13 +133,13 @@ An entry in `algos.yaml` looks like: ```yaml raft_ivf_pq: executable: RAFT_IVF_PQ_ANN_BENCH - disabled: false + requires_gpu: true ``` `executable` : specifies the name of the binary that will build/search the index. It is assumed to be available in `raft/cpp/build/`. -`disabled` : denotes whether an algorithm should be excluded from benchmark runs. +`requires_gpu` : denotes whether an algorithm requires GPU to run. -The usage of the script `bench/ann/run.py` is: +The usage of the script `raft-ann-bench.run` is: ```bash usage: run.py [-h] [-k COUNT] [-bs BATCH_SIZE] [--configuration CONFIGURATION] [--dataset DATASET] [--dataset-path DATASET_PATH] [--build] [--search] [--algorithms ALGORITHMS] [--indices INDICES] [-f] @@ -154,7 +154,7 @@ options: path to configuration file for a dataset (default: None) --dataset DATASET dataset whose configuration file will be used (default: glove-100-inner) --dataset-path DATASET_PATH - path to dataset folder (default: ${RAFT_HOME}/bench/ann/data) + path to dataset folder (default: ${RAPIDS_DATASET_ROOT_DIR}) --build --search --algorithms ALGORITHMS @@ -166,7 +166,7 @@ options: `configuration` and `dataset` : `configuration` is a path to a configuration file for a given dataset. The configuration file should be name as `.json`. It is optional if the name of the dataset is provided with the `dataset` argument, in which case -a configuration file will be searched for as `${RAFT_HOME}/bench/ann/conf/.json`. +a configuration file will be searched for as `python/raft-ann-bench/src/raft-ann-bench/run/conf/.json`. For every algorithm run by this script, it outputs an index build statistics JSON file in `/result/build/` and an index search statistics JSON file in `/result/search/`. @@ -182,8 +182,8 @@ it is assumed both are `True`. is available in `algos.yaml` and not disabled, as well as having an associated executable. #### Step 3: Data Export -The script `bench/ann/data_export.py` will convert the intermediate JSON outputs produced by `bench/ann/run.py` to more -easily readable CSV files, which are needed to build charts made by `bench/ann/plot.py`. +The script `bench/ann/data_export.py` will convert the intermediate JSON outputs produced by `raft-ann-bench.run` to more +easily readable CSV files, which are needed to build charts made by `raft-ann-bench.plot`. ```bash usage: data_export.py [-h] [--dataset DATASET] [--dataset-path DATASET_PATH] @@ -192,7 +192,7 @@ options: -h, --help show this help message and exit --dataset DATASET dataset to download (default: glove-100-inner) --dataset-path DATASET_PATH - path to dataset folder (default: ${RAFT_HOME}/bench/ann/data) + path to dataset folder (default: ${RAPIDS_DATASET_ROOT_DIR}) ``` Build statistics CSV file is stored in `/result/build/` and index search statistics CSV file in `/result/search/`. @@ -210,7 +210,7 @@ options: -h, --help show this help message and exit --dataset DATASET dataset to download (default: glove-100-inner) --dataset-path DATASET_PATH - path to dataset folder (default: ${RAFT_HOME}/bench/ann/data) + path to dataset folder (default: ${RAPIDS_DATASET_ROOT_DIR}) --output-filepath OUTPUT_FILEPATH directory for PNG to be saved (default: os.getcwd()) --algorithms ALGORITHMS diff --git a/python/raft-ann-bench/LICENSE b/python/raft-ann-bench/LICENSE new file mode 120000 index 0000000000..30cff7403d --- /dev/null +++ b/python/raft-ann-bench/LICENSE @@ -0,0 +1 @@ +../../LICENSE \ No newline at end of file diff --git a/python/raft-ann-bench/pyproject.toml b/python/raft-ann-bench/pyproject.toml new file mode 100644 index 0000000000..7decc8858b --- /dev/null +++ b/python/raft-ann-bench/pyproject.toml @@ -0,0 +1,60 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +[build-system] +build-backend = "setuptools.build_meta" +requires = [ + "setuptools", + "wheel", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + +[project] +name = "raft-ann-bench" +version = "23.10.00" +description = "RAFT ANN benchmarks" +authors = [ + { name = "NVIDIA Corporation" }, +] +license = { text = "Apache 2.0" } +requires-python = ">=3.9" +dependencies = [ +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. +classifiers = [ + "Intended Audience :: Developers", + "Topic :: Database", + "Topic :: Scientific/Engineering", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", +] +dynamic = ["entry-points"] + +[project.urls] +Homepage = "https://github.com/rapidsai/raft" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-data] +"*" = ["*.*"] + +[tool.isort] +line_length = 79 +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +combine_as_imports = true +order_by_type = true +skip = [ + "thirdparty", + ".eggs", + ".git", + ".hg", + ".mypy_cache", + ".tox", + ".venv", + "_build", + "buck-out", + "build", + "dist", +] diff --git a/python/raft-ann-bench/src/raft-ann-bench/__init__.py b/python/raft-ann-bench/src/raft-ann-bench/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/bench/ann/data_export.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py similarity index 53% rename from bench/ann/data_export.py rename to python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py index 33304bc276..fd9d00f43c 100644 --- a/bench/ann/data_export.py +++ b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py @@ -15,9 +15,10 @@ import argparse -import pandas as pd -import os import json +import os + +import pandas as pd def read_file(dataset, dataset_path, method): @@ -27,36 +28,58 @@ def read_file(dataset, dataset_path, method): with open(os.path.join(dir, file), "r") as f: data = json.load(f) df = pd.DataFrame(data["benchmarks"]) - yield (os.path.join(dir, file), file.split('-')[0], df) + yield (os.path.join(dir, file), file.split("-")[0], df) + def convert_json_to_csv_build(dataset, dataset_path): for file, algo_name, df in read_file(dataset, dataset_path, "build"): - df['name'] = df['name'].str.split('/').str[0] - write = pd.DataFrame({'algo_name' : [algo_name] * len(df), - 'index_name' : df['name'], - 'time' : df['real_time']}) - write.to_csv(file.replace('.json', '.csv'), index=False) + df["name"] = df["name"].str.split("/").str[0] + write = pd.DataFrame( + { + "algo_name": [algo_name] * len(df), + "index_name": df["name"], + "time": df["real_time"], + } + ) + write.to_csv(file.replace(".json", ".csv"), index=False) def convert_json_to_csv_search(dataset, dataset_path): for file, algo_name, df in read_file(dataset, dataset_path, "search"): - df['name'] = df['name'].str.split('/').str[0] - write = pd.DataFrame({'algo_name' : [algo_name] * len(df), - 'index_name' : df['name'], - 'recall' : df['Recall'], - 'qps' : df['items_per_second']}) - write.to_csv(file.replace('.json', '.csv'), index=False) + df["name"] = df["name"].str.split("/").str[0] + write = pd.DataFrame( + { + "algo_name": [algo_name] * len(df), + "index_name": df["name"], + "recall": df["Recall"], + "qps": df["items_per_second"], + } + ) + write.to_csv(file.replace(".json", ".csv"), index=False) def main(): + + call_path = os.getcwd() + if "RAPIDS_DATASET_ROOT_DIR" in os.environ: + default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR") + else: + default_dataset_path = os.path.join(call_path, "datasets/") + parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("--dataset", help="dataset to download", - default="glove-100-inner") - parser.add_argument("--dataset-path", help="path to dataset folder", - default=os.path.join(os.getenv("RAFT_HOME"), - "bench", "ann", "data")) + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--dataset", help="dataset to download", default="glove-100-inner" + ) + parser.add_argument( + "--dataset-path", + help="path to dataset folder", + default=default_dataset_path, + ) + args = parser.parse_args() + convert_json_to_csv_build(args.dataset, args.dataset_path) convert_json_to_csv_search(args.dataset, args.dataset_path) diff --git a/bench/ann/get_dataset.py b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/__main__.py similarity index 61% rename from bench/ann/get_dataset.py rename to python/raft-ann-bench/src/raft-ann-bench/get_dataset/__main__.py index a175384dc3..d2cb8ebe98 100644 --- a/bench/ann/get_dataset.py +++ b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/__main__.py @@ -32,16 +32,15 @@ def download_dataset(url, path): def convert_hdf5_to_fbin(path, normalize): - ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"), - "cpp/bench/ann/scripts") - ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir, - "hdf5_to_fbin.py") + scripts_path = os.path.dirname(os.path.realpath(__file__)) + ann_bench_scripts_path = os.path.join(scripts_path, "hdf5_to_fbin.py") + print(f"calling script {ann_bench_scripts_path}") if normalize and "angular" in path: - p = subprocess.Popen(["python", ann_bench_scripts_path, "-n", - "%s" % path]) + p = subprocess.Popen( + ["python", ann_bench_scripts_path, "-n", "%s" % path] + ) else: - p = subprocess.Popen(["python", ann_bench_scripts_path, - "%s" % path]) + p = subprocess.Popen(["python", ann_bench_scripts_path, "%s" % path]) p.wait() @@ -53,10 +52,16 @@ def move(name, ann_bench_data_path): new_path = os.path.join(ann_bench_data_path, new_name) if not os.path.exists(new_path): os.mkdir(new_path) - for bin_name in ["base.fbin", "query.fbin", "groundtruth.neighbors.ibin", - "groundtruth.distances.fbin"]: - os.rename(f"{ann_bench_data_path}/{name}.{bin_name}", - f"{new_path}/{bin_name}") + for bin_name in [ + "base.fbin", + "query.fbin", + "groundtruth.neighbors.ibin", + "groundtruth.distances.fbin", + ]: + os.rename( + f"{ann_bench_data_path}/{name}.{bin_name}", + f"{new_path}/{bin_name}", + ) def download(name, normalize, ann_bench_data_path): @@ -74,16 +79,27 @@ def download(name, normalize, ann_bench_data_path): def main(): + call_path = os.getcwd() + if "RAPIDS_DATASET_ROOT_DIR" in os.environ: + default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR") + else: + default_dataset_path = os.path.join(call_path, "datasets/") parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("--dataset", help="dataset to download", - default="glove-100-angular") - parser.add_argument("--dataset-path", help="path to download dataset", - default=os.path.join(os.getenv("RAFT_HOME"), - "bench", "ann", "data")) - parser.add_argument("--normalize", - help="normalize cosine distance to inner product", - action="store_true") + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--dataset", help="dataset to download", default="glove-100-angular" + ) + parser.add_argument( + "--dataset-path", + help="path to download dataset", + default=default_dataset_path, + ) + parser.add_argument( + "--normalize", + help="normalize cosine distance to inner product", + action="store_true", + ) args = parser.parse_args() download(args.dataset, args.normalize, args.dataset_path) diff --git a/cpp/bench/ann/scripts/fbin_to_f16bin.py b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/fbin_to_f16bin.py similarity index 57% rename from cpp/bench/ann/scripts/fbin_to_f16bin.py rename to python/raft-ann-bench/src/raft-ann-bench/get_dataset/fbin_to_f16bin.py index 4ea8988d87..ee7410e0cc 100755 --- a/cpp/bench/ann/scripts/fbin_to_f16bin.py +++ b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/fbin_to_f16bin.py @@ -1,20 +1,23 @@ -# ============================================================================= -# Copyright (c) 2020-2023, NVIDIA CORPORATION. # -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at +# Copyright (c) 2023, NVIDIA CORPORATION. # -# http://www.apache.org/licenses/LICENSE-2.0 +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from __future__ import absolute_import, division, print_function + import sys + import numpy as np diff --git a/cpp/bench/ann/scripts/hdf5_to_fbin.py b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/hdf5_to_fbin.py similarity index 78% rename from cpp/bench/ann/scripts/hdf5_to_fbin.py rename to python/raft-ann-bench/src/raft-ann-bench/get_dataset/hdf5_to_fbin.py index cfeb184ea8..ba853c63f5 100755 --- a/cpp/bench/ann/scripts/hdf5_to_fbin.py +++ b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/hdf5_to_fbin.py @@ -1,19 +1,23 @@ -# ============================================================================= -# Copyright (c) 2020-2023, NVIDIA CORPORATION. # -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at +# Copyright (c) 2023, NVIDIA CORPORATION. # -# http://www.apache.org/licenses/LICENSE-2.0 +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at # -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import sys -import numpy as np + import h5py +import numpy as np def normalize(x): @@ -65,7 +69,8 @@ def write_bin(fname, data): query = normalize(query) elif hdf5.attrs["distance"] == "angular": print( - "warning: input has angular distance, specify -n to normalize base/query set!\n" + "warning: input has angular distance, ", + "specify -n to normalize base/query set!\n", ) output_fname = fname_prefix + ".base.fbin" diff --git a/bench/ann/plot.py b/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py similarity index 60% rename from bench/ann/plot.py rename to python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py index ff7cb29b4a..198d0a2b14 100644 --- a/bench/ann/plot.py +++ b/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py @@ -13,24 +13,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -# This script is inspired by +# This script is inspired by # 1: https://github.com/erikbern/ann-benchmarks/blob/main/plot.py -# 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py -# 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py +# 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py # noqa: E501 +# 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py # noqa: E501 # Licence: https://github.com/erikbern/ann-benchmarks/blob/main/LICENSE -import matplotlib as mpl - -mpl.use("Agg") # noqa import argparse -from collections import OrderedDict import itertools +import os +from collections import OrderedDict + +import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import pandas as pd -import os - +mpl.use("Agg") metrics = { "k-nn": { @@ -41,16 +40,19 @@ "qps": { "description": "Queries per second (1/s)", "worst": float("-inf"), - } + }, } + def positive_int(input_str: str) -> int: try: i = int(input_str) if i < 1: raise ValueError except ValueError: - raise argparse.ArgumentTypeError(f"{input_str} is not a positive integer") + raise argparse.ArgumentTypeError( + f"{input_str} is not a positive integer" + ) return i @@ -63,17 +65,36 @@ def euclidean(a, b): return sum((x - y) ** 2 for x, y in zip(a, b)) while len(colors) < n: - new_color = max(itertools.product(vs, vs, vs), key=lambda a: min(euclidean(a, b) for b in colors)) + new_color = max( + itertools.product(vs, vs, vs), + key=lambda a: min(euclidean(a, b) for b in colors), + ) colors.append(new_color + (1.0,)) return colors def create_linestyles(unique_algorithms): - colors = dict(zip(unique_algorithms, generate_n_colors(len(unique_algorithms)))) - linestyles = dict((algo, ["--", "-.", "-", ":"][i % 4]) for i, algo in enumerate(unique_algorithms)) - markerstyles = dict((algo, ["+", "<", "o", "*", "x"][i % 5]) for i, algo in enumerate(unique_algorithms)) - faded = dict((algo, (r, g, b, 0.3)) for algo, (r, g, b, a) in colors.items()) - return dict((algo, (colors[algo], faded[algo], linestyles[algo], markerstyles[algo])) for algo in unique_algorithms) + colors = dict( + zip(unique_algorithms, generate_n_colors(len(unique_algorithms))) + ) + linestyles = dict( + (algo, ["--", "-.", "-", ":"][i % 4]) + for i, algo in enumerate(unique_algorithms) + ) + markerstyles = dict( + (algo, ["+", "<", "o", "*", "x"][i % 5]) + for i, algo in enumerate(unique_algorithms) + ) + faded = dict( + (algo, (r, g, b, 0.3)) for algo, (r, g, b, a) in colors.items() + ) + return dict( + ( + algo, + (colors[algo], faded[algo], linestyles[algo], markerstyles[algo]), + ) + for algo in unique_algorithms + ) def get_up_down(metric): @@ -98,7 +119,9 @@ def create_pointset(data, xn, yn): # Generate Pareto frontier xs, ys, ls, idxs = [], [], [], [] last_x = xm["worst"] - comparator = (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx) + comparator = ( + (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx) + ) for algo_name, index_name, xv, yv in data: if not xv or not yv: continue @@ -115,8 +138,9 @@ def create_pointset(data, xn, yn): return xs, ys, ls, idxs, axs, ays, als, aidxs -def create_plot_search(all_data, raw, x_scale, y_scale, fn_out, linestyles, - dataset, k, batch_size): +def create_plot_search( + all_data, raw, x_scale, y_scale, fn_out, linestyles, dataset, k, batch_size +): xn = "k-nn" yn = "qps" xm, ym = (metrics[xn], metrics[yn]) @@ -127,23 +151,43 @@ def create_plot_search(all_data, raw, x_scale, y_scale, fn_out, linestyles, # Sorting by mean y-value helps aligning plots with labels def mean_y(algo): - xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(all_data[algo], xn, yn) + xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset( + all_data[algo], xn, yn + ) return -np.log(np.array(ys)).mean() # Find range for logit x-scale min_x, max_x = 1, 0 for algo in sorted(all_data.keys(), key=mean_y): - xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(all_data[algo], xn, yn) + xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset( + all_data[algo], xn, yn + ) min_x = min([min_x] + [x for x in xs if x > 0]) max_x = max([max_x] + [x for x in xs if x < 1]) color, faded, linestyle, marker = linestyles[algo] (handle,) = plt.plot( - xs, ys, "-", label=algo, color=color, ms=7, mew=3, lw=3, marker=marker + xs, + ys, + "-", + label=algo, + color=color, + ms=7, + mew=3, + lw=3, + marker=marker, ) handles.append(handle) if raw: (handle2,) = plt.plot( - axs, ays, "-", label=algo, color=faded, ms=5, mew=2, lw=2, marker=marker + axs, + ays, + "-", + label=algo, + color=faded, + ms=5, + mew=2, + lw=2, + marker=marker, ) labels.append(algo) @@ -177,7 +221,13 @@ def inv_fun(x): ax.set_title(f"{dataset} k={k} batch_size={batch_size}") plt.gca().get_position() # plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height]) - ax.legend(handles, labels, loc="center left", bbox_to_anchor=(1, 0.5), prop={"size": 9}) + ax.legend( + handles, + labels, + loc="center left", + bbox_to_anchor=(1, 0.5), + prop={"size": 9}, + ) plt.grid(visible=True, which="major", color="0.65", linestyle="-") plt.setp(ax.get_xminorticklabels(), visible=True) @@ -198,33 +248,38 @@ def inv_fun(x): plt.close() -def create_plot_build(build_results, search_results, linestyles, fn_out, - dataset, k, batch_size): +def create_plot_build( + build_results, search_results, linestyles, fn_out, dataset, k, batch_size +): xn = "k-nn" yn = "qps" + # recall_85 = [-1] * len(linestyles) qps_85 = [-1] * len(linestyles) bt_85 = [0] * len(linestyles) i_85 = [-1] * len(linestyles) - + # recall_90 = [-1] * len(linestyles) qps_90 = [-1] * len(linestyles) bt_90 = [0] * len(linestyles) i_90 = [-1] * len(linestyles) - + # recall_95 = [-1] * len(linestyles) qps_95 = [-1] * len(linestyles) bt_95 = [0] * len(linestyles) i_95 = [-1] * len(linestyles) - data = OrderedDict() colors = OrderedDict() # Sorting by mean y-value helps aligning plots with labels def mean_y(algo): - xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(search_results[algo], xn, yn) + xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset( + search_results[algo], xn, yn + ) return -np.log(np.array(ys)).mean() for pos, algo in enumerate(sorted(search_results.keys(), key=mean_y)): - xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(search_results[algo], xn, yn) + xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset( + search_results[algo], xn, yn + ) # x is recall, y is qps, ls is algo_name, idxs is index_name for i in range(len(xs)): if xs[i] >= 0.85 and xs[i] < 0.9 and ys[i] > qps_85[pos]: @@ -241,14 +296,14 @@ def mean_y(algo): i_95[pos] = idxs[i] data[algo] = [bt_85[pos], bt_90[pos], bt_95[pos]] colors[algo] = linestyles[algo][0] - - index = ['@85% Recall', '@90% Recall', '@95% Recall'] - + + index = ["@85% Recall", "@90% Recall", "@95% Recall"] + df = pd.DataFrame(data, index=index) plt.figure(figsize=(12, 9)) ax = df.plot.bar(rot=0, color=colors) fig = ax.get_figure() - print(f"writing build output to {fn_out}") + print(f"writing search output to {fn_out}") plt.title("Build Time for Highest QPS") plt.suptitle(f"{dataset} k={k} batch_size={batch_size}") plt.ylabel("Build Time (s)") @@ -258,22 +313,35 @@ def mean_y(algo): def load_lines(results_path, result_files, method, index_key): results = dict() + linebreaker = "name,iterations" + for result_filename in result_files: - if result_filename.endswith('.csv'): - with open(os.path.join(results_path, result_filename), 'r') as f: + if result_filename.endswith(".csv"): + with open(os.path.join(results_path, result_filename), "r") as f: lines = f.readlines() lines = lines[:-1] if lines[-1] == "\n" else lines - + idx = 0 + for pos, line in enumerate(lines): + if linebreaker in line: + idx = pos + break + if method == "build": - key_idx = [2] + if "hnswlib" in result_filename: + key_idx = [2] + else: + key_idx = [10] elif method == "search": - key_idx = [2, 3] + if "hnswlib" in result_filename: + key_idx = [10, 6] + else: + key_idx = [12, 10] - for line in lines[1:]: - split_lines = line.split(',') + for line in lines[idx + 1 :]: + split_lines = line.split(",") - algo_name = split_lines[0] - index_name = split_lines[1] + algo_name = split_lines[0].split(".")[0].strip('"') + index_name = split_lines[0].split("/")[0].strip('"') if index_key == "algo": dict_key = algo_name @@ -289,14 +357,22 @@ def load_lines(results_path, result_files, method, index_key): return results -def load_all_results(dataset_path, algorithms, k, batch_size, method, index_key): +def load_all_results( + dataset_path, algorithms, k, batch_size, method, index_key +): results_path = os.path.join(dataset_path, "result", method) result_files = os.listdir(results_path) - result_files = [result_filename for result_filename in result_files \ - if f"{k}-{batch_size}" in result_filename] + result_files = [ + result_filename + for result_filename in result_files + if f"{k}-{batch_size}" in result_filename + ] if len(algorithms) > 0: - result_files = [result_filename for result_filename in result_files if \ - result_filename.split('-')[0] in algorithms] + result_files = [ + result_filename + for result_filename in result_files + if result_filename.split("-")[0] in algorithms + ] results = load_lines(results_path, result_files, method, index_key) @@ -304,39 +380,57 @@ def load_all_results(dataset_path, algorithms, k, batch_size, method, index_key) def main(): + call_path = os.getcwd() + if "RAPIDS_DATASET_ROOT_DIR" in os.environ: + default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR") + else: + default_dataset_path = os.path.join(call_path, "datasets/") + parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("--dataset", help="dataset to download", - default="glove-100-inner") - parser.add_argument("--dataset-path", help="path to dataset folder", - default=os.path.join(os.getenv("RAFT_HOME"), - "bench", "ann", "data")) - parser.add_argument("--output-filepath", - help="directory for PNG to be saved", - default=os.getcwd()) - parser.add_argument("--algorithms", - help="plot only comma separated list of named \ - algorithms", - default=None) + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) parser.add_argument( - "-k", "--count", default=10, type=positive_int, help="the number of nearest neighbors to search for" + "--dataset", help="dataset to download", default="glove-100-inner" ) parser.add_argument( - "-bs", "--batch-size", default=10000, type=positive_int, help="number of query vectors to use in each query trial" + "--dataset-path", + help="path to dataset folder, by default will look in " + "RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets " + "subdirectory from the calling directory", + default=default_dataset_path, ) parser.add_argument( - "--build", - action="store_true" + "--output-filepath", + help="directory for PNG to be saved", + default=os.getcwd(), ) parser.add_argument( - "--search", - action="store_true" + "--algorithms", + help="plot only comma separated list of named \ + algorithms", + default=None, + ) + parser.add_argument( + "-k", + "--count", + default=10, + type=positive_int, + help="the number of nearest neighbors to search for", ) + parser.add_argument( + "-bs", + "--batch-size", + default=10000, + type=positive_int, + help="number of query vectors to use in each query trial", + ) + parser.add_argument("--build", action="store_true") + parser.add_argument("--search", action="store_true") parser.add_argument( "--x-scale", help="Scale to use when drawing the X-axis. \ - Typically linear, logit or a2", - default="linear" + Typically linear, logit or a2", + default="linear", ) parser.add_argument( "--y-scale", @@ -345,13 +439,15 @@ def main(): default="linear", ) parser.add_argument( - "--raw", help="Show raw results (not just Pareto frontier) in faded colours", action="store_true" + "--raw", + help="Show raw results (not just Pareto frontier) in faded colours", + action="store_true", ) args = parser.parse_args() if args.algorithms: - algorithms = args.algorithms.split(',') + algorithms = args.algorithms.split(",") else: algorithms = [] k = args.count @@ -363,22 +459,52 @@ def main(): build = args.build search = args.search - search_output_filepath = os.path.join(args.output_filepath, f"search-{args.dataset}-k{k}-batch_size{batch_size}.png") - build_output_filepath = os.path.join(args.output_filepath, f"build-{args.dataset}-k{k}-batch_size{batch_size}.png") + search_output_filepath = os.path.join( + args.output_filepath, f"search-{args.dataset}-{k}-{batch_size}.png" + ) + build_output_filepath = os.path.join( + args.output_filepath, f"build-{args.dataset}-{k}-{batch_size}.png" + ) search_results = load_all_results( - os.path.join(args.dataset_path, args.dataset), - algorithms, k, batch_size, "search", "algo") + os.path.join(args.dataset_path, args.dataset), + algorithms, + k, + batch_size, + "search", + "algo", + ) linestyles = create_linestyles(sorted(search_results.keys())) if search: - create_plot_search(search_results, args.raw, args.x_scale, args.y_scale, - search_output_filepath, linestyles, args.dataset, k, batch_size) + create_plot_search( + search_results, + args.raw, + args.x_scale, + args.y_scale, + search_output_filepath, + linestyles, + args.dataset, + k, + batch_size, + ) if build: build_results = load_all_results( os.path.join(args.dataset_path, args.dataset), - algorithms, k, batch_size, "build", "index") - create_plot_build(build_results, search_results, linestyles, build_output_filepath, - args.dataset, k, batch_size) + algorithms, + k, + batch_size, + "build", + "index", + ) + create_plot_build( + build_results, + search_results, + linestyles, + build_output_filepath, + args.dataset, + k, + batch_size, + ) if __name__ == "__main__": diff --git a/bench/ann/run.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py similarity index 52% rename from bench/ann/run.py rename to python/raft-ann-bench/src/raft-ann-bench/run/__main__.py index 2da966cbcd..d5a65ddfb7 100644 --- a/bench/ann/run.py +++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py @@ -17,6 +17,7 @@ import json import os import subprocess + import yaml @@ -26,31 +27,58 @@ def positive_int(input_str: str) -> int: if i < 1: raise ValueError except ValueError: - raise argparse.ArgumentTypeError(f"{input_str} is not a positive integer") + raise argparse.ArgumentTypeError( + f"{input_str} is not a positive integer" + ) return i -def validate_algorithm(algos_conf, algo): + +def validate_algorithm(algos_conf, algo, gpu_present): algos_conf_keys = set(algos_conf.keys()) - return algo in algos_conf_keys and not algos_conf[algo]["disabled"] + if gpu_present: + return algo in algos_conf_keys + else: + return ( + algo in algos_conf_keys + and algos_conf[algo]["requires_gpu"] is False + ) def find_executable(algos_conf, algo, k, batch_size): executable = algos_conf[algo]["executable"] - conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann", - executable) - build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable) - if os.path.exists(conda_path): - return (executable, conda_path, f"{algo}-k{k}-batch_size{batch_size}") - elif os.path.exists(build_path): - return (executable, build_path, f"{algo}-k{k}-batch_size{batch_size}") + + build_path = os.getenv("RAFT_HOME") + if build_path is not None: + build_path = os.path.join(build_path, "cpp", "build", executable) + if os.path.exists(build_path): + print(f"-- Using RAFT bench from repository in {build_path}. ") + return (executable, build_path, f"{algo}-{k}-{batch_size}") + + # if there is no build folder present, we look in the conda environment + conda_path = os.getenv("CONDA_PREFIX") + if conda_path is not None: + conda_path = os.path.join(conda_path, "bin", "ann", executable) + if os.path.exists(conda_path): + print("-- Using RAFT bench found in conda environment. ") + return (executable, conda_path, f"{algo}-{k}-{batch_size}") + else: raise FileNotFoundError(executable) -def run_build_and_search(conf_file, conf_filename, conf_filedir, - executables_to_run, dataset_path, force, - build, search, k, batch_size): +def run_build_and_search( + conf_file, + conf_filename, + conf_filedir, + executables_to_run, + dataset_path, + force, + build, + search, + k, + batch_size, +): for executable, ann_executable_path, algo in executables_to_run.keys(): # Need to write temporary configuration temp_conf_filename = f"temporary_{conf_filename}" @@ -59,21 +87,26 @@ def run_build_and_search(conf_file, conf_filename, conf_filedir, temp_conf = dict() temp_conf["dataset"] = conf_file["dataset"] temp_conf["search_basic_param"] = conf_file["search_basic_param"] - temp_conf["index"] = executables_to_run[(executable, - ann_executable_path, - algo)]["index"] + temp_conf["index"] = executables_to_run[ + (executable, ann_executable_path, algo) + ]["index"] json.dump(temp_conf, f) - legacy_result_folder = os.path.join(dataset_path, conf_file['dataset']['name'], 'result') + legacy_result_folder = os.path.join( + dataset_path, conf_file["dataset"]["name"], "result" + ) os.makedirs(legacy_result_folder, exist_ok=True) if build: build_folder = os.path.join(legacy_result_folder, "build") os.makedirs(build_folder, exist_ok=True) - cmd = [ann_executable_path, - "--build", - "--data_prefix="+dataset_path, - "--benchmark_out_format=json", - f"--benchmark_out={os.path.join(build_folder, f'{algo}.json')}"] + cmd = [ + ann_executable_path, + "--build", + "--data_prefix=" + dataset_path, + "--benchmark_out_format=json", + "--benchmark_out=" + + f"{os.path.join(build_folder, f'{algo}.json')}", + ] if force: cmd = cmd + ["--overwrite"] cmd = cmd + [temp_conf_filepath] @@ -84,14 +117,18 @@ def run_build_and_search(conf_file, conf_filename, conf_filedir, if search: search_folder = os.path.join(legacy_result_folder, "search") os.makedirs(search_folder, exist_ok=True) - cmd = [ann_executable_path, - "--search", - "--data_prefix="+dataset_path, - "--benchmark_counters_tabular", - "--override_kv=k:%s" % k, - "--override_kv=n_queries:%s" % batch_size, - "--benchmark_out_format=json", - f"--benchmark_out={os.path.join(search_folder, f'{algo}.json')}"] + cmd = [ + ann_executable_path, + "--search", + "--data_prefix=" + dataset_path, + "--benchmark_counters_tabular", + "--override_kv=k:%s" % k, + "--override_kv=n_queries:%s" % batch_size, + "--benchmark_min_warmup_time=0.01", + "--benchmark_out_format=json", + "--benchmark_out=" + + f"{os.path.join(search_folder, f'{algo}.json')}", + ] if force: cmd = cmd + ["--overwrite"] cmd = cmd + [temp_conf_filepath] @@ -104,18 +141,41 @@ def run_build_and_search(conf_file, conf_filename, conf_filedir, def main(): scripts_path = os.path.dirname(os.path.realpath(__file__)) + call_path = os.getcwd() + # Read list of allowed algorithms + try: + import pylibraft # noqa: F401 + + gpu_present = True + except ImportError: + gpu_present = False + with open(f"{scripts_path}/algos.yaml", "r") as f: algos_conf = yaml.safe_load(f) + if "RAPIDS_DATASET_ROOT_DIR" in os.environ: + default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR") + else: + default_dataset_path = os.path.join(call_path, "datasets/") + parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) parser.add_argument( - "-k", "--count", default=10, type=positive_int, help="the number of nearest neighbors to search for" + "-k", + "--count", + default=10, + type=positive_int, + help="the number of nearest neighbors to search for", ) parser.add_argument( - "-bs", "--batch-size", default=10000, type=positive_int, help="number of query vectors to use in each query trial" + "-bs", + "--batch-size", + default=10000, + type=positive_int, + help="number of query vectors to use in each query trial", ) parser.add_argument( "--configuration", @@ -124,34 +184,36 @@ def main(): parser.add_argument( "--dataset", help="dataset whose configuration file will be used", - default="glove-100-inner" + default="glove-100-inner", ) parser.add_argument( "--dataset-path", - help="path to dataset folder", - default=os.path.join(os.getenv("RAFT_HOME"), - "bench", "ann", "data") + help="path to dataset folder, by default will look in " + "RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets " + "subdirectory from the calling directory", + default=default_dataset_path, ) + parser.add_argument("--build", action="store_true") + parser.add_argument("--search", action="store_true") parser.add_argument( - "--build", - action="store_true" + "--algorithms", + help="run only comma separated list of named \ + algorithms", + default=None, ) parser.add_argument( - "--search", - action="store_true" - ) - parser.add_argument("--algorithms", - help="run only comma separated list of named \ - algorithms", - default=None) - parser.add_argument("--indices", - help="run only comma separated list of named indices. \ + "--indices", + help="run only comma separated list of named indices. \ parameter `algorithms` is ignored", - default=None) - parser.add_argument("-f", "--force", - help="re-run algorithms even if their results \ + default=None, + ) + parser.add_argument( + "-f", + "--force", + help="re-run algorithms even if their results \ already exist", - action="store_true") + action="store_true", + ) args = parser.parse_args() @@ -171,11 +233,14 @@ def main(): if args.configuration: conf_filepath = args.configuration elif args.dataset: - conf_filepath = \ - os.path.join(scripts_path, "conf", f"{args.dataset}.json") + conf_filepath = os.path.join( + scripts_path, "conf", f"{args.dataset}.json" + ) else: - raise ValueError("One of parameters `configuration` or \ - `dataset` need to be provided") + raise ValueError( + "One of parameters `configuration` or \ + `dataset` need to be provided" + ) conf_filename = conf_filepath.split("/")[-1] conf_filedir = "/".join(conf_filepath.split("/")[:-1]) dataset_name = conf_filename.replace(".json", "") @@ -196,9 +261,12 @@ def main(): # and enabled for index in conf_file["index"]: curr_algo = index["algo"] - if index["name"] in indices and \ - validate_algorithm(algos_conf, curr_algo): - executable_path = find_executable(algos_conf, curr_algo, k, batch_size) + if index["name"] in indices and validate_algorithm( + algos_conf, curr_algo, gpu_present + ): + executable_path = find_executable( + algos_conf, curr_algo, k, batch_size + ) if executable_path not in executables_to_run: executables_to_run[executable_path] = {"index": []} executables_to_run[executable_path]["index"].append(index) @@ -210,9 +278,12 @@ def main(): # and are enabled in algos.yaml for index in conf_file["index"]: curr_algo = index["algo"] - if curr_algo in algorithms and \ - validate_algorithm(algos_conf, curr_algo): - executable_path = find_executable(algos_conf, curr_algo, k, batch_size) + if curr_algo in algorithms and validate_algorithm( + algos_conf, curr_algo, gpu_present + ): + executable_path = find_executable( + algos_conf, curr_algo, k, batch_size + ) if executable_path not in executables_to_run: executables_to_run[executable_path] = {"index": []} executables_to_run[executable_path]["index"].append(index) @@ -221,22 +292,36 @@ def main(): else: for index in conf_file["index"]: curr_algo = index["algo"] - if validate_algorithm(algos_conf, curr_algo): - executable_path = find_executable(algos_conf, curr_algo, k, batch_size) + if validate_algorithm(algos_conf, curr_algo, gpu_present): + executable_path = find_executable( + algos_conf, curr_algo, k, batch_size + ) if executable_path not in executables_to_run: executables_to_run[executable_path] = {"index": []} executables_to_run[executable_path]["index"].append(index) # Replace index to dataset path for executable_path in executables_to_run: - for pos, index in enumerate(executables_to_run[executable_path]["index"]): - index["file"] = os.path.join(dataset_path, dataset_name, "index", index["name"]) + for pos, index in enumerate( + executables_to_run[executable_path]["index"] + ): + index["file"] = os.path.join( + dataset_path, dataset_name, "index", index["name"] + ) executables_to_run[executable_path]["index"][pos] = index - run_build_and_search(conf_file, conf_filename, conf_filedir, - executables_to_run, dataset_path, - args.force, build, search, - k, batch_size) + run_build_and_search( + conf_file, + conf_filename, + conf_filedir, + executables_to_run, + dataset_path, + args.force, + build, + search, + k, + batch_size, + ) if __name__ == "__main__": diff --git a/bench/ann/algos.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml similarity index 70% rename from bench/ann/algos.yaml rename to python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml index 5f554fc46b..30abe0dda6 100644 --- a/bench/ann/algos.yaml +++ b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml @@ -1,30 +1,30 @@ faiss_gpu_ivf_flat: executable: FAISS_IVF_FLAT_ANN_BENCH - disabled: false + requires_gpu: true faiss_gpu_flat: executable: FAISS_IVF_FLAT_ANN_BENCH - disabled: false + requires_gpu: true faiss_gpu_ivf_pq: executable: FAISS_IVF_PQ_ANN_BENCH - disabled: false + requires_gpu: true faiss_gpu_ivf_sq: executable: FAISS_IVF_PQ_ANN_BENCH - disabled: false + requires_gpu: true faiss_gpu_bfknn: executable: FAISS_BFKNN_ANN_BENCH - disabled: false + requires_gpu: true raft_ivf_flat: executable: RAFT_IVF_FLAT_ANN_BENCH - disabled: false + requires_gpu: true raft_ivf_pq: executable: RAFT_IVF_PQ_ANN_BENCH - disabled: false + requires_gpu: true raft_cagra: executable: RAFT_CAGRA_ANN_BENCH - disabled: false + requires_gpu: true ggnn: executable: GGNN_ANN_BENCH - disabled: false + requires_gpu: true hnswlib: executable: HNSWLIB_ANN_BENCH - disabled: false \ No newline at end of file + requires_gpu: false diff --git a/bench/ann/conf/bigann-100M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/bigann-100M.json similarity index 100% rename from bench/ann/conf/bigann-100M.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/bigann-100M.json diff --git a/bench/ann/conf/deep-100M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json similarity index 100% rename from bench/ann/conf/deep-100M.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json diff --git a/bench/ann/conf/deep-1B.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-1B.json similarity index 100% rename from bench/ann/conf/deep-1B.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-1B.json diff --git a/bench/ann/conf/deep-image-96-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json similarity index 77% rename from bench/ann/conf/deep-image-96-angular.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json index 0724d8b09f..f1c033e415 100644 --- a/bench/ann/conf/deep-image-96-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json @@ -1,8 +1,8 @@ { "dataset": { - "name": "deep-image-96-angular", - "base_file": "data/deep-image-96-angular/base.fbin", - "query_file": "data/deep-image-96-angular/query.fbin", + "name": "deep-image-96-inner", + "base_file": "deep-image-96-inner/base.fbin", + "query_file": "deep-image-96-inner/query.fbin", "distance": "euclidean" }, "search_basic_param": { @@ -15,7 +15,7 @@ "name" : "hnswlib.M12", "algo" : "hnswlib", "build_param": {"M":12, "efConstruction":500, "numThreads":32}, - "file" : "index/deep-image-96-angular/hnswlib/M12", + "file" : "index/deep-image-96-inner/hnswlib/M12", "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, @@ -28,13 +28,13 @@ {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} ], - "search_result_file" : "result/deep-image-96-angular/hnswlib/M12" + "search_result_file" : "result/deep-image-96-inner/hnswlib/M12" }, { "name" : "hnswlib.M16", "algo" : "hnswlib", "build_param": {"M":16, "efConstruction":500, "numThreads":32}, - "file" : "index/deep-image-96-angular/hnswlib/M16", + "file" : "index/deep-image-96-inner/hnswlib/M16", "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, @@ -47,13 +47,13 @@ {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} ], - "search_result_file" : "result/deep-image-96-angular/hnswlib/M16" + "search_result_file" : "result/deep-image-96-inner/hnswlib/M16" }, { "name" : "hnswlib.M24", "algo" : "hnswlib", "build_param": {"M":24, "efConstruction":500, "numThreads":32}, - "file" : "index/deep-image-96-angular/hnswlib/M24", + "file" : "index/deep-image-96-inner/hnswlib/M24", "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, @@ -66,13 +66,13 @@ {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} ], - "search_result_file" : "result/deep-image-96-angular/hnswlib/M24" + "search_result_file" : "result/deep-image-96-inner/hnswlib/M24" }, { "name" : "hnswlib.M36", "algo" : "hnswlib", "build_param": {"M":36, "efConstruction":500, "numThreads":32}, - "file" : "index/deep-image-96-angular/hnswlib/M36", + "file" : "index/deep-image-96-inner/hnswlib/M36", "search_params" : [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, @@ -85,7 +85,7 @@ {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} ], - "search_result_file" : "result/deep-image-96-angular/hnswlib/M36" + "search_result_file" : "result/deep-image-96-inner/hnswlib/M36" }, @@ -96,13 +96,13 @@ "algo": "raft_bfknn", "build_param": {}, - "file": "index/deep-image-96-angular/raft_bfknn/bfknn", + "file": "index/deep-image-96-inner/raft_bfknn/bfknn", "search_params": [ { "probe": 1 } ], - "search_result_file": "result/deep-image-96-angular/raft_bfknn/bfknn" + "search_result_file": "result/deep-image-96-inner/raft_bfknn/bfknn" }, { "name": "faiss_ivf_flat.nlist1024", @@ -110,7 +110,7 @@ "build_param": { "nlist": 1024 }, - "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist1024", + "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist1024", "search_params": [ { "nprobe": 1 @@ -137,7 +137,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist1024" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist1024" }, { "name": "faiss_ivf_flat.nlist2048", @@ -145,7 +145,7 @@ "build_param": { "nlist": 2048 }, - "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist2048", + "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist2048", "search_params": [ { "nprobe": 1 @@ -172,7 +172,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist2048" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist2048" }, { "name": "faiss_ivf_flat.nlist4096", @@ -180,7 +180,7 @@ "build_param": { "nlist": 4096 }, - "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist4096", + "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist4096", "search_params": [ { "nprobe": 1 @@ -207,7 +207,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist4096" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist4096" }, { "name": "faiss_ivf_flat.nlist8192", @@ -215,7 +215,7 @@ "build_param": { "nlist": 8192 }, - "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist8192", + "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist8192", "search_params": [ { "nprobe": 1 @@ -242,7 +242,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist8192" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist8192" }, { "name": "faiss_ivf_flat.nlist16384", @@ -250,7 +250,7 @@ "build_param": { "nlist": 16384 }, - "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist16384", + "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist16384", "search_params": [ { "nprobe": 1 @@ -280,7 +280,7 @@ "nprobe": 2000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist16384" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist16384" }, { "name": "faiss_ivf_pq.M64-nlist1024", @@ -291,7 +291,7 @@ "useFloat16": true, "usePrecomputed": true }, - "file": "index/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024", + "file": "index/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024", "search_params": [ {"nprobe": 10}, {"nprobe": 50}, @@ -300,7 +300,7 @@ {"nprobe": 500}, {"nprobe": 1000} ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024" }, { "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", @@ -311,7 +311,7 @@ "useFloat16": true, "usePrecomputed": false }, - "file": "index/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024.noprecomp", + "file": "index/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ {"nprobe": 10}, {"nprobe": 50}, @@ -320,7 +320,7 @@ {"nprobe": 500}, {"nprobe": 1000} ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024" }, { "name": "faiss_ivf_sq.nlist1024-fp16", @@ -329,7 +329,7 @@ "nlist": 1024, "quantizer_type": "fp16" }, - "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist1024-fp16", + "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist1024-fp16", "search_params": [ { "nprobe": 1 @@ -356,7 +356,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist1024-fp16" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist1024-fp16" }, { "name": "faiss_ivf_sq.nlist2048-fp16", @@ -365,7 +365,7 @@ "nlist": 2048, "quantizer_type": "fp16" }, - "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist2048-fp16", + "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist2048-fp16", "search_params": [ { "nprobe": 1 @@ -392,7 +392,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist2048-fp16" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist2048-fp16" }, { "name": "faiss_ivf_sq.nlist4096-fp16", @@ -401,7 +401,7 @@ "nlist": 4096, "quantizer_type": "fp16" }, - "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist4096-fp16", + "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist4096-fp16", "search_params": [ { "nprobe": 1 @@ -428,7 +428,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist4096-fp16" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist4096-fp16" }, { "name": "faiss_ivf_sq.nlist8192-fp16", @@ -437,7 +437,7 @@ "nlist": 8192, "quantizer_type": "fp16" }, - "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist8192-fp16", + "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist8192-fp16", "search_params": [ { "nprobe": 1 @@ -464,7 +464,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist8192-fp16" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist8192-fp16" }, { "name": "faiss_ivf_sq.nlist16384-fp16", @@ -473,7 +473,7 @@ "nlist": 16384, "quantizer_type": "fp16" }, - "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist16384-fp16", + "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist16384-fp16", "search_params": [ { "nprobe": 1 @@ -503,7 +503,7 @@ "nprobe": 2000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist16384-fp16" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist16384-fp16" }, { "name": "faiss_ivf_sq.nlist1024-int8", @@ -512,7 +512,7 @@ "nlist": 1024, "quantizer_type": "int8" }, - "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist1024-int8", + "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist1024-int8", "search_params": [ { "nprobe": 1 @@ -539,7 +539,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist1024-int8" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist1024-int8" }, { "name": "faiss_ivf_sq.nlist2048-int8", @@ -548,7 +548,7 @@ "nlist": 2048, "quantizer_type": "int8" }, - "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist2048-int8", + "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist2048-int8", "search_params": [ { "nprobe": 1 @@ -575,7 +575,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist2048-int8" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist2048-int8" }, { "name": "faiss_ivf_sq.nlist4096-int8", @@ -584,7 +584,7 @@ "nlist": 4096, "quantizer_type": "int8" }, - "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist4096-int8", + "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist4096-int8", "search_params": [ { "nprobe": 1 @@ -611,7 +611,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist4096-int8" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist4096-int8" }, { "name": "faiss_ivf_sq.nlist8192-int8", @@ -620,7 +620,7 @@ "nlist": 8192, "quantizer_type": "int8" }, - "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist8192-int8", + "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist8192-int8", "search_params": [ { "nprobe": 1 @@ -647,7 +647,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist8192-int8" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist8192-int8" }, { "name": "faiss_ivf_sq.nlist16384-int8", @@ -656,7 +656,7 @@ "nlist": 16384, "quantizer_type": "int8" }, - "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist16384-int8", + "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist16384-int8", "search_params": [ { "nprobe": 1 @@ -686,17 +686,17 @@ "nprobe": 2000 } ], - "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist16384-int8" + "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist16384-int8" }, { "name": "faiss_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "index/deep-image-96-angular/faiss_flat/flat", + "file": "index/deep-image-96-inner/faiss_flat/flat", "search_params": [ {} ], - "search_result_file": "result/deep-image-96-angular/faiss_flat/flat" + "search_result_file": "result/deep-image-96-inner/faiss_flat/flat" }, { @@ -705,7 +705,7 @@ "build_param": {"nlist": 1024, "pq_dim": 128, "ratio": 1, "niter": 25 }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024", + "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024", "search_params": [ {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half"}, {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half"}, @@ -714,7 +714,7 @@ {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half"}, {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half"} ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024" + "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024" }, { "name": "raft_ivf_pq.dimpq128-cluster1024-float-float", @@ -726,7 +726,7 @@ "ratio": 1, "niter": 25 }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float", + "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-float", "search_params": [ {"nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float"}, {"nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float"}, @@ -737,7 +737,7 @@ {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"}, {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"} ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float" + "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-float" }, { "name": "raft_ivf_pq.dimpq128-cluster1024-float-half", @@ -749,7 +749,7 @@ "ratio": 1, "niter": 25 }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half", + "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-half", "search_params": [ {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"}, {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"}, @@ -758,7 +758,7 @@ {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"}, {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"} ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half" + "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-half" }, { "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8", @@ -769,7 +769,7 @@ "ratio": 1, "niter": 25 }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8", + "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8", "search_params": [ {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, @@ -778,7 +778,7 @@ {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"} ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8" + "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8" }, { "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8", @@ -789,7 +789,7 @@ "ratio": 1, "niter": 25 }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8", + "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8", "search_params": [ {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, @@ -798,7 +798,7 @@ {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"} ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8" + "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8" }, { "name": "raft_ivf_pq.dimpq64-cluster1024-float-half", @@ -809,7 +809,7 @@ "ratio": 1, "niter": 25 }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half", + "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-half", "search_params": [ {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"}, {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"}, @@ -818,7 +818,7 @@ {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"}, {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"} ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half" + "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-half" }, { "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8", @@ -829,7 +829,7 @@ "ratio": 1, "niter": 25 }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8", + "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8", "search_params": [ {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, @@ -838,7 +838,7 @@ {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"} ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8" + "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8" }, { "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8", @@ -850,7 +850,7 @@ "ratio": 1, "niter": 25 }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8", + "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8", "search_params": [ {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, @@ -859,7 +859,7 @@ {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}, {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"} ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8" + "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8" }, { "name": "raft_ivf_pq.dimpq128-cluster1024-half-float", @@ -870,7 +870,7 @@ "ratio": 1, "niter": 25 }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float", + "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-half-float", "search_params": [ {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float"}, {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float"}, @@ -879,7 +879,7 @@ {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float"}, {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float"} ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float" + "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-half-float" }, { "name": "raft_ivf_pq.dimpq512-cluster1024-float-float", @@ -890,7 +890,7 @@ "ratio": 1, "niter": 25 }, - "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float", + "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq512-cluster1024-float-float", "search_params": [ {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float"}, {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float"}, @@ -899,7 +899,7 @@ {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"}, {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"} ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float" + "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq512-cluster1024-float-float" }, { "name": "raft_ivf_flat.nlist1024", @@ -909,7 +909,7 @@ "ratio": 1, "niter": 25 }, - "file": "index/deep-image-96-angular/raft_ivf_flat/nlist1024", + "file": "index/deep-image-96-inner/raft_ivf_flat/nlist1024", "search_params": [ { "nprobe": 1 @@ -936,7 +936,7 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_flat/nlist1024" + "search_result_file": "result/deep-image-96-inner/raft_ivf_flat/nlist1024" }, { "name": "raft_ivf_flat.nlist16384", @@ -946,7 +946,7 @@ "ratio": 2, "niter": 20 }, - "file": "index/deep-image-96-angular/raft_ivf_flat/nlist16384", + "file": "index/deep-image-96-inner/raft_ivf_flat/nlist16384", "search_params": [ { "nprobe": 1 @@ -976,7 +976,7 @@ "nprobe": 2000 } ], - "search_result_file": "result/deep-image-96-angular/raft_ivf_flat/nlist16384" + "search_result_file": "result/deep-image-96-inner/raft_ivf_flat/nlist16384" }, { @@ -985,13 +985,13 @@ "build_param": { "graph_degree" : 32 }, - "file" : "index/deep-image-96-angular/raft_cagra/dim32", + "file" : "index/deep-image-96-inner/raft_cagra/dim32", "search_params" : [ {"itopk": 32}, {"itopk": 64}, {"itopk": 128} ], - "search_result_file" : "result/deep-image-96-angular/raft_cagra/dim32" + "search_result_file" : "result/deep-image-96-inner/raft_cagra/dim32" }, { @@ -1000,13 +1000,13 @@ "build_param": { "graph_degree" : 64 }, - "file" : "index/deep-image-96-angular/raft_cagra/dim64", + "file" : "index/deep-image-96-inner/raft_cagra/dim64", "search_params" : [ {"itopk": 32}, {"itopk": 64}, {"itopk": 128} ], - "search_result_file" : "result/deep-image-96-angular/raft_cagra/dim64" + "search_result_file" : "result/deep-image-96-inner/raft_cagra/dim64" } ] } diff --git a/bench/ann/conf/fashion-mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json similarity index 99% rename from bench/ann/conf/fashion-mnist-784-euclidean.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json index d1b58be367..65f28fc81a 100644 --- a/bench/ann/conf/fashion-mnist-784-euclidean.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json @@ -1,8 +1,8 @@ { "dataset": { "name": "fashion-mnist-784-euclidean", - "base_file": "data/fashion-mnist-784-euclidean/base.fbin", - "query_file": "data/fashion-mnist-784-euclidean/query.fbin", + "base_file": "fashion-mnist-784-euclidean/base.fbin", + "query_file": "fashion-mnist-784-euclidean/query.fbin", "distance": "euclidean" }, "search_basic_param": { diff --git a/bench/ann/conf/gist-960-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json similarity index 99% rename from bench/ann/conf/gist-960-euclidean.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json index fed7750172..b097aa7ca0 100644 --- a/bench/ann/conf/gist-960-euclidean.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json @@ -1,8 +1,8 @@ { "dataset": { "name": "gist-960-euclidean", - "base_file": "data/gist-960-euclidean/base.fbin", - "query_file": "data/gist-960-euclidean/query.fbin", + "base_file": "gist-960-euclidean/base.fbin", + "query_file": "gist-960-euclidean/query.fbin", "distance": "euclidean" }, "search_basic_param": { diff --git a/bench/ann/conf/glove-100-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json similarity index 99% rename from bench/ann/conf/glove-100-angular.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json index 8c2f8ee617..526aef2db0 100644 --- a/bench/ann/conf/glove-100-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json @@ -1,8 +1,8 @@ { "dataset": { "name": "glove-100-angular", - "base_file": "data/glove-100-angular/base.fbin", - "query_file": "data/glove-100-angular/query.fbin", + "base_file": "glove-100-angular/base.fbin", + "query_file": "glove-100-angular/query.fbin", "distance": "euclidean" }, "search_basic_param": { diff --git a/bench/ann/conf/glove-100-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json similarity index 100% rename from bench/ann/conf/glove-100-inner.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json diff --git a/bench/ann/conf/glove-50-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json similarity index 99% rename from bench/ann/conf/glove-50-angular.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json index a73ed1ec07..9b3f192c9f 100644 --- a/bench/ann/conf/glove-50-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json @@ -1,8 +1,8 @@ { "dataset": { "name": "glove-50-angular", - "base_file": "data/glove-50-angular/base.fbin", - "query_file": "data/glove-50-angular/query.fbin", + "base_file": "glove-50-angular/base.fbin", + "query_file": "glove-50-angular/query.fbin", "distance": "euclidean" }, "search_basic_param": { diff --git a/bench/ann/conf/lastfm-65-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json similarity index 99% rename from bench/ann/conf/lastfm-65-angular.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json index b07e682268..e5a4ca6e5f 100644 --- a/bench/ann/conf/lastfm-65-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json @@ -1,8 +1,8 @@ { "dataset": { "name": "lastfm-65-angular", - "base_file": "data/lastfm-65-angular/base.fbin", - "query_file": "data/lastfm-65-angular/query.fbin", + "base_file": "lastfm-65-angular/base.fbin", + "query_file": "lastfm-65-angular/query.fbin", "distance": "euclidean" }, "search_basic_param": { diff --git a/bench/ann/conf/mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json similarity index 99% rename from bench/ann/conf/mnist-784-euclidean.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json index 362cc21083..2a493edeed 100644 --- a/bench/ann/conf/mnist-784-euclidean.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json @@ -1,8 +1,8 @@ { "dataset": { "name": "mnist-784-euclidean", - "base_file": "data/mnist-784-euclidean/base.fbin", - "query_file": "data/mnist-784-euclidean/query.fbin", + "base_file": "mnist-784-euclidean/base.fbin", + "query_file": "mnist-784-euclidean/query.fbin", "distance": "euclidean" }, "search_basic_param": { diff --git a/bench/ann/conf/nytimes-256-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json similarity index 99% rename from bench/ann/conf/nytimes-256-angular.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json index 4c389bb6b7..630b700ba5 100644 --- a/bench/ann/conf/nytimes-256-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json @@ -1,8 +1,8 @@ { "dataset": { "name": "nytimes-256-angular", - "base_file": "data/nytimes-256-angular/base.fbin", - "query_file": "data/nytimes-256-angular/query.fbin", + "base_file": "nytimes-256-angular/base.fbin", + "query_file": "nytimes-256-angular/query.fbin", "distance": "euclidean" }, "search_basic_param": { diff --git a/bench/ann/conf/sift-128-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/sift-128-euclidean.json similarity index 100% rename from bench/ann/conf/sift-128-euclidean.json rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/sift-128-euclidean.json diff --git a/bench/ann/split_groundtruth.py b/python/raft-ann-bench/src/raft-ann-bench/split_groundtruth/__main__.py similarity index 63% rename from bench/ann/split_groundtruth.py rename to python/raft-ann-bench/src/raft-ann-bench/split_groundtruth/__main__.py index cd67d9c8b8..161617f85c 100644 --- a/bench/ann/split_groundtruth.py +++ b/python/raft-ann-bench/src/raft-ann-bench/split_groundtruth/__main__.py @@ -19,25 +19,26 @@ def split_groundtruth(groundtruth_filepath): - ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"), - "cpp/bench/ann/scripts") - ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir, - "split_groundtruth.pl") + ann_bench_scripts_path = "split_groundtruth.pl" pwd = os.getcwd() os.chdir("/".join(groundtruth_filepath.split("/")[:-1])) groundtruth_filename = groundtruth_filepath.split("/")[-1] - p = subprocess.Popen([ann_bench_scripts_path, groundtruth_filename, - "groundtruth"]) + p = subprocess.Popen( + [ann_bench_scripts_path, groundtruth_filename, "groundtruth"] + ) p.wait() os.chdir(pwd) def main(): parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument("--groundtruth", - help="Path to billion-scale dataset groundtruth file", - required=True) + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--groundtruth", + help="Path to billion-scale dataset groundtruth file", + required=True, + ) args = parser.parse_args() split_groundtruth(args.groundtruth) diff --git a/cpp/bench/ann/scripts/split_groundtruth.pl b/python/raft-ann-bench/src/raft-ann-bench/split_groundtruth/split_groundtruth.pl similarity index 100% rename from cpp/bench/ann/scripts/split_groundtruth.pl rename to python/raft-ann-bench/src/raft-ann-bench/split_groundtruth/split_groundtruth.pl