From bd738ece0cd965707cd417a1a6eec7e6a3ce4895 Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Wed, 9 Aug 2023 13:19:31 +0200
Subject: [PATCH 01/70] ANN-benchmarks: switch to use gbench

---
 cpp/CMakeLists.txt                            |    2 +-
 cpp/bench/ann/CMakeLists.txt                  |   55 +-
 cpp/bench/ann/conf/bigann-100M.json           |  244 ++-
 cpp/bench/ann/conf/deep-100M.json             |  862 ++--------
 cpp/bench/ann/conf/deep-1B.json               |   36 +-
 cpp/bench/ann/conf/glove-100-inner.json       |  512 +++---
 cpp/bench/ann/conf/sift-128-euclidean.json    | 1473 ++++-------------
 cpp/bench/ann/scripts/eval.pl                 |  430 -----
 cpp/bench/ann/src/common/ann_types.hpp        |   54 +-
 cpp/bench/ann/src/common/benchmark.cpp        |  107 ++
 cpp/bench/ann/src/common/benchmark.hpp        |  912 +++++-----
 cpp/bench/ann/src/common/benchmark_util.hpp   |   33 -
 cpp/bench/ann/src/common/conf.cpp             |  151 --
 cpp/bench/ann/src/common/conf.h               |   75 -
 cpp/bench/ann/src/common/conf.hpp             |  156 ++
 cpp/bench/ann/src/common/cuda_stub.hpp        |  148 ++
 .../ann/src/common/{dataset.h => dataset.hpp} |   89 +-
 cpp/bench/ann/src/common/util.cpp             |   68 -
 cpp/bench/ann/src/common/util.h               |   79 -
 cpp/bench/ann/src/common/util.hpp             |  318 ++++
 cpp/bench/ann/src/faiss/faiss_benchmark.cu    |    8 +-
 cpp/bench/ann/src/faiss/faiss_wrapper.h       |   13 +-
 cpp/bench/ann/src/ggnn/ggnn_benchmark.cu      |    8 +-
 cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh       |   14 +-
 .../ann/src/hnswlib/hnswlib_benchmark.cpp     |   14 +-
 cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h   |    5 +-
 cpp/bench/ann/src/raft/raft_benchmark.cu      |   19 +-
 cpp/bench/ann/src/raft/raft_cagra_wrapper.h   |   36 +-
 .../ann/src/raft/raft_ivf_flat_wrapper.h      |   37 +-
 cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h  |   38 +-
 docs/source/ann_benchmarks_low_level.md       |  264 +--
 31 files changed, 2370 insertions(+), 3890 deletions(-)
 delete mode 100755 cpp/bench/ann/scripts/eval.pl
 create mode 100644 cpp/bench/ann/src/common/benchmark.cpp
 delete mode 100644 cpp/bench/ann/src/common/benchmark_util.hpp
 delete mode 100644 cpp/bench/ann/src/common/conf.cpp
 delete mode 100644 cpp/bench/ann/src/common/conf.h
 create mode 100644 cpp/bench/ann/src/common/conf.hpp
 create mode 100644 cpp/bench/ann/src/common/cuda_stub.hpp
 rename cpp/bench/ann/src/common/{dataset.h => dataset.hpp} (85%)
 delete mode 100644 cpp/bench/ann/src/common/util.cpp
 delete mode 100644 cpp/bench/ann/src/common/util.h
 create mode 100644 cpp/bench/ann/src/common/util.hpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 610c5b97f6..d545a0a8cc 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -162,7 +162,7 @@ if(BUILD_TESTS)
   include(cmake/thirdparty/get_gtest.cmake)
 endif()
 
-if(BUILD_PRIMS_BENCH)
+if(BUILD_PRIMS_BENCH OR BUILD_ANN_BENCH)
   include(${rapids-cmake-dir}/cpm/gbench.cmake)
   rapids_cpm_gbench()
 endif()
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 6977d77684..5e31e71b06 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -77,10 +77,11 @@ function(ConfigureAnnBench)
 
   set(BENCH_NAME ${ConfigureAnnBench_NAME}_ANN_BENCH)
 
-  add_executable(
-    ${BENCH_NAME} ${ConfigureAnnBench_PATH} bench/ann/src/common/conf.cpp
-                  bench/ann/src/common/util.cpp
-  )
+  add_library(${BENCH_NAME} SHARED ${ConfigureAnnBench_PATH})
+
+  string(TOLOWER ${BENCH_NAME} BENCH_LIB_NAME)
+  set_target_properties(${BENCH_NAME} PROPERTIES OUTPUT_NAME ${BENCH_LIB_NAME})
+
   target_link_libraries(
     ${BENCH_NAME}
     PRIVATE raft::raft
@@ -91,18 +92,21 @@ function(ConfigureAnnBench)
             ${RAFT_CTK_MATH_DEPENDENCIES}
             $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
             $<TARGET_NAME_IF_EXISTS:conda_env>
+            -static-libgcc
+            -static-libstdc++
   )
 
   set_target_properties(
     ${BENCH_NAME}
     PROPERTIES # set target compile options
-               INSTALL_RPATH "\$ORIGIN/../../../lib"
                CXX_STANDARD 17
                CXX_STANDARD_REQUIRED ON
                CUDA_STANDARD 17
                CUDA_STANDARD_REQUIRED ON
                POSITION_INDEPENDENT_CODE ON
                INTERFACE_POSITION_INDEPENDENT_CODE ON
+               BUILD_RPATH "\$ORIGIN"
+               INSTALL_RPATH "\$ORIGIN"
   )
 
   set(${ConfigureAnnBench_CXXFLAGS} ${RAFT_CXX_FLAGS} ${ConfigureAnnBench_CXXFLAGS})
@@ -200,3 +204,44 @@ if(RAFT_ANN_BENCH_USE_GGNN)
     ${CMAKE_CURRENT_BINARY_DIR}/_deps/ggnn-src/include LINKS glog::glog
   )
 endif()
+
+# ##################################################################################################
+# * Dynamically-loading ANN_BENCH executable -------------------------------------------------------
+
+add_executable(ANN_BENCH bench/ann/src/common/benchmark.cpp)
+
+# Build and link static version of the GBench to keep ANN_BENCH self-contained.
+get_target_property(TMP_PROP benchmark::benchmark SOURCES)
+add_library(benchmark_static STATIC ${TMP_PROP})
+get_target_property(TMP_PROP benchmark::benchmark INCLUDE_DIRECTORIES)
+target_include_directories(benchmark_static PUBLIC ${TMP_PROP})
+get_target_property(TMP_PROP benchmark::benchmark LINK_LIBRARIES)
+target_link_libraries(benchmark_static PUBLIC ${TMP_PROP})
+
+target_include_directories(ANN_BENCH PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+
+target_link_libraries(
+  ANN_BENCH PRIVATE nlohmann_json::nlohmann_json benchmark_static dl -static-libgcc
+                    -static-libstdc++ CUDA::nvtx3
+)
+set_target_properties(
+  ANN_BENCH
+  PROPERTIES # set target compile options
+             CXX_STANDARD 17
+             CXX_STANDARD_REQUIRED ON
+             CUDA_STANDARD 17
+             CUDA_STANDARD_REQUIRED ON
+             POSITION_INDEPENDENT_CODE ON
+             INTERFACE_POSITION_INDEPENDENT_CODE ON
+             BUILD_RPATH "\$ORIGIN"
+             INSTALL_RPATH "\$ORIGIN"
+)
+
+target_link_options(ANN_BENCH PRIVATE -export-dynamic)
+
+install(
+  TARGETS ANN_BENCH
+  COMPONENT ann_bench
+  DESTINATION bin/ann
+  EXCLUDE_FROM_ALL
+)
diff --git a/cpp/bench/ann/conf/bigann-100M.json b/cpp/bench/ann/conf/bigann-100M.json
index 0e59936f0e..54606f600e 100644
--- a/cpp/bench/ann/conf/bigann-100M.json
+++ b/cpp/bench/ann/conf/bigann-100M.json
@@ -1,79 +1,89 @@
 {
-  "dataset" : {
-    "name" : "bigann-100M",
-    "base_file" : "data/bigann-1B/base.1B.u8bin",
-    "subset_size" : 100000000,
-    "query_file" : "data/bigann-1B/query.public.10K.u8bin",
-    "distance" : "euclidean"
+  "dataset": {
+    "name": "bigann-100M",
+    "base_file": "bigann-1B/base.1B.u8bin",
+    "subset_size": 100000000,
+    "query_file": "bigann-1B/query.public.10K.u8bin",
+    "groundtruth_neighbors_file": "bigann-100M/groundtruth.neighbors.ibin",
+    "distance": "euclidean"
   },
 
-  "search_basic_param" : {
-    "batch_size" : 10000,
-    "k" : 10,
-    "run_count" : 2
+  "search_basic_param": {
+    "batch_size": 10000,
+    "k": 10
   },
 
-  "index" : [
+  "index": [
     {
-      "name": "raft_ivf_pq.dimpq64-cluster5K-float-float",
+      "name": "raft_ivf_pq.dimpq64-cluster5K",
       "algo": "raft_ivf_pq",
-      "build_param": {
-        "niter": 25,
-        "nlist": 5000,
-        "pq_dim": 64,
-        "ratio": 10
-      },
-      "file": "index/bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
+      "build_param": {"niter": 25, "nlist": 5000, "pq_dim": 64, "ratio": 10},
+      "file": "bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
       "search_params": [
-        {
-          "numProbes": 20,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 30,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 40,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 1000,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/bigann-100M/raft_ivf_pq/dimpq64-cluster5K-float-float"
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
     },
     {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
+      "name": "raft_ivf_pq.dimpq64-cluster10K",
+      "algo": "raft_ivf_pq",
+      "build_param": {"niter": 25, "nlist": 10000, "pq_dim": 64, "ratio": 10},
+      "file": "bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
+      "search_params": [
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
+    },
+    {
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
       "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M12",
-      "search_params" : [
+      "file": "bigann-100M/hnswlib/M12",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -84,15 +94,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M12"
+      ]
     },
     {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
       "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M16",
-      "search_params" : [
+      "file": "bigann-100M/hnswlib/M16",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -103,15 +112,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M16"
+      ]
     },
     {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
       "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M24",
-      "search_params" : [
+      "file": "bigann-100M/hnswlib/M24",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -122,15 +130,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M24"
+      ]
     },
     {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
       "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M36",
-      "search_params" : [
+      "file": "bigann-100M/hnswlib/M36",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -141,62 +148,45 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M36"
+      ]
     },
-
-
     {
-      "name" : "raft_ivf_flat.nlist100K",
-      "algo" : "raft_ivf_flat",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/bigann-100M/raft_ivf_flat/nlist100K",
-      "search_params" : [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/bigann-100M/raft_ivf_flat/nlist100K"
+      "name": "raft_ivf_flat.nlist100K",
+      "algo": "raft_ivf_flat",
+      "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
+      "file": "bigann-100M/raft_ivf_flat/nlist100K",
+      "search_params": [
+        {"max_batch":10000, "max_k":10, "nprobe":20},
+        {"max_batch":10000, "max_k":10, "nprobe":30},
+        {"max_batch":10000, "max_k":10, "nprobe":40},
+        {"max_batch":10000, "max_k":10, "nprobe":50},
+        {"max_batch":10000, "max_k":10, "nprobe":100},
+        {"max_batch":10000, "max_k":10, "nprobe":200},
+        {"max_batch":10000, "max_k":10, "nprobe":500},
+        {"max_batch":10000, "max_k":10, "nprobe":1000}
+      ]
     },
-
     {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/bigann-100M/raft_cagra/dim32",
-      "search_params" : [
+      "name": "raft_cagra.dim32",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 32},
+      "file": "bigann-100M/raft_cagra/dim32",
+      "search_params": [
         {"itopk": 32},
         {"itopk": 64},
         {"itopk": 128}
-      ],
-      "search_result_file" : "result/bigann-100M/raft_cagra/dim32"
+      ]
     },
-
-
     {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/bigann-100M/raft_cagra/dim64",
-      "search_params" : [
+      "name": "raft_cagra.dim64",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 64},
+      "file": "bigann-100M/raft_cagra/dim64",
+      "search_params": [
         {"itopk": 32},
         {"itopk": 64},
         {"itopk": 128}
-      ],
-      "search_result_file" : "result/bigann-100M/raft_cagra/dim64"
+      ]
     }
   ]
 }
diff --git a/cpp/bench/ann/conf/deep-100M.json b/cpp/bench/ann/conf/deep-100M.json
index 2f2ec92489..46e1879f52 100644
--- a/cpp/bench/ann/conf/deep-100M.json
+++ b/cpp/bench/ann/conf/deep-100M.json
@@ -1,25 +1,25 @@
 {
-  "dataset" : {
-    "name" : "deep-100M",
-    "base_file" : "data/deep-1B/base.1B.fbin",
-    "subset_size" : 100000000,
-    "query_file" : "data/deep-1B/query.public.10K.fbin",
-    "distance" : "euclidean"
+  "dataset": {
+    "name": "deep-100M",
+    "base_file": "data/deep-1B/base.1B.fbin",
+    "subset_size": 100000000,
+    "query_file": "data/deep-1B/query.public.10K.fbin",
+    "groundtruth_neighbors_file": "deep-100M/groundtruth.neighbors.ibin",
+    "distance": "euclidean"
   },
 
-  "search_basic_param" : {
-    "batch_size" : 10000,
-    "k" : 10,
-    "run_count" : 2
+  "search_basic_param": {
+    "batch_size": 10000,
+    "k": 10
   },
 
-  "index" : [
+  "index": [
     {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
       "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M12",
-      "search_params" : [
+      "file": "deep-100M/hnswlib/M12",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -30,15 +30,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M12"
+      ]
     },
     {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
       "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M16",
-      "search_params" : [
+      "file": "deep-100M/hnswlib/M16",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -49,15 +48,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M16"
+      ]
     },
     {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
       "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M24",
-      "search_params" : [
+      "file": "deep-100M/hnswlib/M24",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -68,15 +66,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M24"
+      ]
     },
     {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
       "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M36",
-      "search_params" : [
+      "file": "deep-100M/hnswlib/M36",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -87,15 +84,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M36"
+      ]
     },
     {
-      "name" : "faiss_ivf_flat.nlist50K",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist50K",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":50000},
-      "file" : "index/deep-100M/faiss_ivf_flat/nlist50K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_flat/nlist50K",
+      "search_params": [
         {"nprobe":20},
         {"nprobe":30},
         {"nprobe":40},
@@ -104,15 +100,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist50K"
+      ]
     },
     {
-      "name" : "faiss_ivf_flat.nlist100K",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist100K",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":100000},
-      "file" : "index/deep-100M/faiss_ivf_flat/nlist100K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_flat/nlist100K",
+      "search_params": [
         {"nprobe":20},
         {"nprobe":30},
         {"nprobe":40},
@@ -121,15 +116,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist100K"
+      ]
     },
     {
-      "name" : "faiss_ivf_flat.nlist200K",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist200K",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":200000},
-      "file" : "index/deep-100M/faiss_ivf_flat/nlist200K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_flat/nlist200K",
+      "search_params": [
         {"nprobe":20},
         {"nprobe":30},
         {"nprobe":40},
@@ -138,17 +132,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist200K"
+      ]
     },
-
-
     {
-      "name" : "faiss_ivf_pq.M48-nlist16K",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M48-nlist16K",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":16384, "M":48},
-      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist16K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_pq/M48-nlist16K",
+      "search_params": [
         {"nprobe":10},
         {"nprobe":20},
         {"nprobe":30},
@@ -157,15 +148,14 @@
         {"nprobe":100},
         {"nprobe":200},
         {"nprobe":500}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist16K"
+      ]
     },
     {
-      "name" : "faiss_ivf_pq.M48-nlist50K",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M48-nlist50K",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":50000, "M":48},
-      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist50K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_pq/M48-nlist50K",
+      "search_params": [
         {"nprobe":20},
         {"nprobe":30},
         {"nprobe":40},
@@ -174,15 +164,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist50K"
+      ]
     },
     {
-      "name" : "faiss_ivf_pq.M48-nlist100K",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M48-nlist100K",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":100000, "M":48},
-      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist100K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_pq/M48-nlist100K",
+      "search_params": [
         {"nprobe":20},
         {"nprobe":30},
         {"nprobe":40},
@@ -191,659 +180,102 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist100K"
+      ]
     },
-
-
     {
-      "name" : "raft_ivf_flat.nlist10K",
-      "algo" : "raft_ivf_flat",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_flat/nlist10K",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_flat/nlist10K"
+      "name": "raft_ivf_flat.nlist100K",
+      "algo": "raft_ivf_flat",
+      "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
+      "file": "deep-100M/raft_ivf_flat/nlist100K",
+      "search_params": [
+        {"max_batch":10000, "max_k":10, "nprobe":20},
+        {"max_batch":10000, "max_k":10, "nprobe":30},
+        {"max_batch":10000, "max_k":10, "nprobe":40},
+        {"max_batch":10000, "max_k":10, "nprobe":50},
+        {"max_batch":10000, "max_k":10, "nprobe":100},
+        {"max_batch":10000, "max_k":10, "nprobe":200},
+        {"max_batch":10000, "max_k":10, "nprobe":500},
+        {"max_batch":10000, "max_k":10, "nprobe":1000}
+      ]
     },
     {
-      "name" : "raft_ivf_flat.nlist100K",
-      "algo" : "raft_ivf_flat",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_flat/nlist100K",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_flat/nlist100K"
+      "name": "raft_cagra.dim32",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "file": "deep-100M/raft_cagra/dim32",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "single_cta"},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "single_cta"},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "single_cta"},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "single_cta"},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "single_cta"},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "single_cta"},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "single_cta"},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "single_cta"},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "single_cta"},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "single_cta"},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "single_cta"},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "single_cta"}
+      ]
     },
-
     {
-      "name" : "raft_ivf_pq.nlist10K",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist10K",
-      "search_params" : [
-        {"nprobe":3},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10K"
-    },
+      "name": "raft_cagra.dim32.multi_cta",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "file": "deep-100M/raft_cagra/dim32",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta"},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta"},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta"},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta"},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta"},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta"},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta"},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta"},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta"},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta"},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta"},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta"}
+      ]
     {
-      "name" : "raft_ivf_pq.nlist10Kdim64",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 64
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist10Kdim64",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10Kdim64"
+      "name": "raft_cagra.dim32.multi_kernel",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "file": "deep-100M/raft_cagra/dim32",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_kernel"},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_kernel"},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_kernel"},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_kernel"},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_kernel"},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_kernel"},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_kernel"},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_kernel"},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_kernel"},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_kernel"},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_kernel"},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_kernel"}
+      ]
     },
     {
-      "name" : "raft_ivf_pq.nlist10Kdim32",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 32
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist10Kdim32",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10Kdim32"
-    },
-    {
-      "name" : "raft_ivf_pq.nlist100K",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist100K",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100K"
-    },
-    {
-      "name" : "raft_ivf_pq.nlist100Kdim64",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 64
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist100Kdim64",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100Kdim64"
-    },
-    {
-      "name" : "raft_ivf_pq.nlist100Kdim32",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 32
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist100Kdim32",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100Kdim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim": 32,
-        "intermediate_graph_degree": 48
-      },
-      "file": "index/deep-100M/raft_cagra/dim32",
-      "search_params" : [
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        }
-      ],
-      "search_result_file": "result/deep-100M/raft_cagra/dim32"
-    },
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim": 64
-      },
-      "file": "index/deep-100M/raft_cagra/dim64",
-      "search_params" : [
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        }
-      ],
-      "search_result_file": "result/deep-100M/raft_cagra/dim64"
+      "name": "raft_cagra.dim64",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 64},
+      "file": "deep-100M/raft_cagra/dim64",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64}
+      ]
     }
   ]
 }
diff --git a/cpp/bench/ann/conf/deep-1B.json b/cpp/bench/ann/conf/deep-1B.json
index 50d1b87602..632d2f7308 100644
--- a/cpp/bench/ann/conf/deep-1B.json
+++ b/cpp/bench/ann/conf/deep-1B.json
@@ -1,25 +1,24 @@
 {
-  "dataset" : {
-    "name" : "deep-1B",
-    "base_file" : "data/deep-1B/base.1B.fbin",
-    "query_file" : "data/deep-1B/query.public.10K.fbin",
-    // although distance should be "euclidean", faiss becomes much slower for that
-    "distance" : "inner_product"
+  "dataset": {
+    "name": "deep-1B",
+    "base_file": "deep-1B/base.1B.fbin",
+    "query_file": "deep-1B/query.public.10K.fbin",
+    "groundtruth_neighbors_file": "deep-1B/groundtruth.neighbors.ibin",
+    "distance": "inner_product"
   },
 
-  "search_basic_param" : {
-    "batch_size" : 10000,
-    "k" : 10,
-    "run_count" : 2
+  "search_basic_param": {
+    "batch_size": 10000,
+    "k": 10
   },
 
-  "index" : [
+  "index": [
     {
-      "name" : "faiss_ivf_pq.M48-nlist50K",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M48-nlist50K",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":50000, "M":48},
-      "file" : "index/deep-1B/faiss_ivf_pq/M48-nlist50K",
-      "search_params" : [
+      "file": "deep-1B/faiss_ivf_pq/M48-nlist50K",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -29,10 +28,7 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/deep-1B/faiss_ivf_pq/M48-nlist50K"
-    },
-
-
+      ]
+    }
   ]
 }
diff --git a/cpp/bench/ann/conf/glove-100-inner.json b/cpp/bench/ann/conf/glove-100-inner.json
index 5d0bbf970c..7c95ceb439 100644
--- a/cpp/bench/ann/conf/glove-100-inner.json
+++ b/cpp/bench/ann/conf/glove-100-inner.json
@@ -1,24 +1,24 @@
 {
-  "dataset" : {
-    "name" : "glove-100-inner",
-    "base_file" : "data/glove-100-inner/base.fbin",
-    "query_file" : "data/glove-100-inner/query.fbin",
-    "distance" : "inner_product"
+  "dataset": {
+    "name": "glove-100-inner",
+    "base_file": "glove-100-inner/base.fbin",
+    "query_file": "glove-100-inner/query.fbin",
+    "groundtruth_neighbors_file": "glove-100-inner/groundtruth.neighbors.ibin",
+    "distance": "inner_product"
   },
 
-  "search_basic_param" : {
-    "batch_size" : 1,
-    "k" : 10,
-    "run_count" : 3
+  "search_basic_param": {
+    "batch_size": 1,
+    "k": 10
   },
 
-  "index" : [
+  "index": [
     {
-      "name" : "hnswlib.M4",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M4",
+      "algo": "hnswlib",
       "build_param": {"M":4, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M4",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M4",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -28,16 +28,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M4"
+      ]
     },
-
     {
-      "name" : "hnswlib.M8",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M8",
+      "algo": "hnswlib",
       "build_param": {"M":8, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M8",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M8",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -47,16 +45,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M8"
+      ]
     },
-
     {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
       "build_param": {"M":12, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M12",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M12",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -66,16 +62,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M12"
+      ]
     },
-
     {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
       "build_param": {"M":16, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M16",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M16",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -85,16 +79,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M16"
+      ]
     },
-
     {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
       "build_param": {"M":24, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M24",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M24",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -104,16 +96,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M24"
+      ]
     },
-
     {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
       "build_param": {"M":36, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M36",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M36",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -123,16 +113,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M36"
+      ]
     },
-
     {
-      "name" : "hnswlib.M48",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M48",
+      "algo": "hnswlib",
       "build_param": {"M":48, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M48",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M48",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -142,16 +130,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M48"
+      ]
     },
-
     {
-      "name" : "hnswlib.M64",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M64",
+      "algo": "hnswlib",
       "build_param": {"M":64, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M64",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M64",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -161,16 +147,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M64"
+      ]
     },
-
     {
-      "name" : "hnswlib.M96",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M96",
+      "algo": "hnswlib",
       "build_param": {"M":96, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M96",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M96",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -180,16 +164,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M96"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist1024",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":1024},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -198,16 +180,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist2048",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":2048},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -216,16 +196,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist4096",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":4096},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -234,16 +212,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist8192",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":8192},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -252,16 +228,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist16384",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":16384},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -271,18 +245,17 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist16384"
+      ]
     },
 
 
 
     {
-      "name" : "faiss_ivf_pq.M2-nlist1024",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":1024, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -291,16 +264,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist2048",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist2048",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":2048, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -309,16 +280,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist4096",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist4096",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":4096, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -327,16 +296,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist8192",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist8192",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":8192, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -345,16 +312,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist16384",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist16384",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":16384, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -364,16 +329,14 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist16384"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist1024",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":1024, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -382,16 +345,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist2048",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist2048",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":2048, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -400,16 +361,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist4096",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist4096",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":4096, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -418,16 +377,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist8192",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist8192",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":8192, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -436,16 +393,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist16384",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist16384",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":16384, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -455,16 +410,14 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist16384"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist1024",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":1024, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -473,16 +426,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist2048",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist2048",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":2048, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -491,16 +442,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist4096",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist4096",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":4096, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -509,16 +458,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist8192",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist8192",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":8192, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -527,16 +474,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist16384",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist16384",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":16384, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -546,17 +491,16 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist16384"
+      ]
     },
 
 
     {
-      "name" : "faiss_ivf_sq.nlist1024-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":1024, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist1024-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -565,16 +509,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist1024-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist2048-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":2048, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist2048-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -583,16 +525,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist2048-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist4096-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":4096, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist4096-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -601,16 +541,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist4096-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist8192-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":8192, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist8192-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -619,16 +557,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist8192-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist16384-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":16384, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist16384-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -638,17 +574,14 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist16384-fp16"
+      ]
     },
-
-
     {
-      "name" : "faiss_ivf_sq.nlist1024-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":1024, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist1024-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -657,16 +590,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist1024-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist2048-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":2048, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist2048-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -675,16 +606,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist2048-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist4096-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":4096, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist4096-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -693,16 +622,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist4096-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist8192-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":8192, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist8192-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -711,16 +638,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist8192-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist16384-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":16384, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist16384-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -730,22 +655,18 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist16384-int8"
+      ]
     },
-
     {
-      "name" : "faiss_flat",
-      "algo" : "faiss_gpu_flat",
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
       "build_param": {},
-      "file" : "index/glove-100-inner/faiss_flat/flat",
-      "search_params" : [{}],
-      "search_result_file" : "result/glove-100-inner/faiss_flat/flat"
+      "file": "glove-100-inner/faiss_flat/flat",
+      "search_params": [{}]
     },
-
     {
-      "name" : "ggnn.kbuild96-segment64-refine2-k10",
-      "algo" : "ggnn",
+      "name": "ggnn.kbuild96-segment64-refine2-k10",
+      "algo": "ggnn",
       "build_param": {
         "k_build": 96,
         "segment_size": 64,
@@ -753,8 +674,8 @@
         "dataset_size": 1183514,
         "k": 10
       },
-      "file" : "index/glove-100-inner/ggnn/kbuild96-segment64-refine2-k10",
-      "search_params" : [
+      "file": "glove-100-inner/ggnn/kbuild96-segment64-refine2-k10",
+      "search_params": [
         {"tau":0.001, "block_dim":64, "sorted_size":32},
         {"tau":0.005, "block_dim":64, "sorted_size":32},
         {"tau":0.01,  "block_dim":64, "sorted_size":32},
@@ -786,8 +707,7 @@
         {"tau":0.3,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
         {"tau":0.4,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
         {"tau":0.5,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}
-
-      ],
-      "search_result_file" : "result/glove-100-inner/ggnn/kbuild96-segment64-refine2-k10"
-    }]
+      ]
+    }
+  ]
 }
diff --git a/cpp/bench/ann/conf/sift-128-euclidean.json b/cpp/bench/ann/conf/sift-128-euclidean.json
index c4b8905b1d..116ea8d557 100644
--- a/cpp/bench/ann/conf/sift-128-euclidean.json
+++ b/cpp/bench/ann/conf/sift-128-euclidean.json
@@ -1,22 +1,24 @@
 {
   "dataset": {
     "name": "sift-128-euclidean",
-    "base_file": "/home/cjnolet/workspace/ann_data/sift-128-euclidean/base.fbin",
-    "query_file": "/home/cjnolet/workspace/ann_data/sift-128-euclidean/query.fbin",
+    "base_file": "sift-128-euclidean/base.fbin",
+    "query_file": "sift-128-euclidean/query.fbin",
+    "groundtruth_neighbors_file": "sift-128-euclidean/groundtruth.neighbors.ibin",
     "distance": "euclidean"
   },
+
   "search_basic_param": {
     "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
+    "k": 10
   },
+
   "index": [
     {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
       "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M12",
-      "search_params" : [
+      "file": "sift-128-euclidean/hnswlib/M12",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -27,15 +29,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M12"
+      ]
     },
     {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
       "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M16",
-      "search_params" : [
+      "file": "sift-128-euclidean/hnswlib/M16",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -46,15 +47,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M16"
+      ]
     },
     {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
       "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M24",
-      "search_params" : [
+      "file": "sift-128-euclidean/hnswlib/M24",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -65,15 +65,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M24"
+      ]
     },
     {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
       "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M36",
-      "search_params" : [
+      "file": "sift-128-euclidean/hnswlib/M36",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -84,234 +83,109 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M36"
+      ]
     },
-
-
-
-
     {
       "name": "raft_bfknn",
       "algo": "raft_bfknn",
       "build_param": {},
-      "file": "index/sift-128-euclidean/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_bfknn/bfknn"
+      "file": "sift-128-euclidean/raft_bfknn/bfknn",
+      "search_params": [{"probe": 1}]
     },
     {
       "name": "faiss_ivf_flat.nlist1024",
       "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist1024",
+      "build_param": {"nlist": 1024},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist1024",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist1024"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_flat.nlist2048",
       "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist2048",
+      "build_param": {"nlist": 2048},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist2048",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist2048"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_flat.nlist4096",
       "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist4096",
+      "build_param": {"nlist": 4096},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist4096",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist4096"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_flat.nlist8192",
       "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist8192",
+      "build_param": {"nlist": 8192},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist8192",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist8192"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_flat.nlist16384",
       "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist16384",
+      "build_param": {"nlist": 16384},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist16384",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist16384"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
     },
     {
       "name": "faiss_ivf_pq.M64-nlist1024",
       "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024",
+      "build_param": {"nlist": 1024, "M": 64, "useFloat16": true, "usePrecomputed": true},
+      "file": "sift-128-euclidean/faiss_ivf_pq/M64-nlist1024",
       "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024"
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
@@ -322,1030 +196,303 @@
         "useFloat16": true,
         "usePrecomputed": false
       },
-      "file": "index/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "file": "sift-128-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
       "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024"
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist1024-fp16",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16",
+      "build_param": {"nlist": 1024, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist2048-fp16",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16",
+      "build_param": {"nlist": 2048, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist4096-fp16",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16",
+      "build_param": {"nlist": 4096, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist8192-fp16",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16",
+      "build_param": {"nlist": 8192, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist16384-fp16",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16",
+      "build_param": {"nlist": 16384, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist1024-int8",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist1024-int8",
+      "build_param": {"nlist": 1024, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist1024-int8",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist1024-int8"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist2048-int8",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist2048-int8",
+      "build_param": {"nlist": 2048,"quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist2048-int8",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist2048-int8"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist4096-int8",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist4096-int8",
+      "build_param": {"nlist": 4096, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist4096-int8",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist4096-int8"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist8192-int8",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist8192-int8",
+      "build_param": {"nlist": 8192, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist8192-int8",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist8192-int8"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist16384-int8",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist16384-int8",
+      "build_param": {"nlist": 16384, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist16384-int8",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist16384-int8"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
     },
     {
       "name": "faiss_flat",
       "algo": "faiss_gpu_flat",
       "build_param": {},
-      "file": "index/sift-128-euclidean/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024"
+      "file": "sift-128-euclidean/faiss_flat/flat",
+      "search_params": [{}]
     },
     {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "name": "raft_ivf_pq.dimpq64-bitpq8-cluster1K",
       "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "build_param": {"niter": 25, "nlist": 1000, "pq_dim": 64, "pq_bits": 8, "ratio": 1},
+      "file": "sift-128-euclidean/raft_ivf_pq/dimpq64-bitpq8-cluster1K",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-bitpq6-cluster1K",
       "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "build_param": {"niter": 25, "nlist": 1000, "pq_dim": 128, "pq_bits": 6, "ratio": 1},
+      "file": "sift-128-euclidean/raft_ivf_pq/dimpq128-bitpq6-cluster1K",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
     },
     {
       "name": "raft_ivf_flat.nlist1024",
       "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_flat/nlist1024",
+      "build_param": {"nlist": 1024, "ratio": 1, "niter": 25},
+      "file": "sift-128-euclidean/raft_ivf_flat/nlist1024",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_flat/nlist1024"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "raft_ivf_flat.nlist16384",
       "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_flat/nlist16384",
+      "build_param": {"nlist": 16384, "ratio": 2, "niter": 20},
+      "file": "sift-128-euclidean/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
+    },
+    {
+      "name": "raft_cagra.dim32",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 32},
+      "file": "sift-128-euclidean/raft_cagra/dim32",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/sift-128-euclidean/raft_cagra/dim32",
-      "search_params" : [
         {"itopk": 32},
         {"itopk": 64},
         {"itopk": 128}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/raft_cagra/dim32"
+      ]
     },
-
     {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/sift-128-euclidean/raft_cagra/dim64",
-      "search_params" : [
+      "name": "raft_cagra.dim64",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 64},
+      "file": "sift-128-euclidean/raft_cagra/dim64",
+      "search_params": [
         {"itopk": 32},
         {"itopk": 64},
         {"itopk": 128}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/raft_cagra/dim64"
+      ]
     }
   ]
 }
diff --git a/cpp/bench/ann/scripts/eval.pl b/cpp/bench/ann/scripts/eval.pl
deleted file mode 100755
index 81c5563d79..0000000000
--- a/cpp/bench/ann/scripts/eval.pl
+++ /dev/null
@@ -1,430 +0,0 @@
-#!/usr/bin/perl
-
-# =============================================================================
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-
-use warnings;
-use strict;
-use autodie qw(open close);
-use File::Find;
-use Getopt::Std;
-
-my $QPS = 'QPS';
-my $AVG_LATENCY = 'avg_latency(ms)';
-my $P99_LATENCY = 'p99_latency(ms)';
-my $P999_LATENCY = 'p999_latency(ms)';
-my @CONDITIONS = ([$QPS, 2000], ['recall', 0.9], ['recall', 0.95]);
-
-
-my $USAGE = << 'END';
-usage: [-f] [-l avg|p99|p999] [-o output.csv] groundtruth.neighbors.ibin result_paths...
-  result_paths... are paths to the search result files.
-    Can specify multiple paths.
-    For each of them, if it's a directory, all the .txt files found under
-    it recursively will be regarded as inputs.
-
-  -f: force to recompute recall and update it in result file if needed
-  -l: output search latency rather than QPS. Available options:
-        "avg" for average latency;
-        "p99" for 99th percentile latency;
-        "p999" for 99.9th percentile latency.
-  -o: also write result to a csv file
-END
-
-
-my %opt;
-getopts('fl:o:', \%opt)
-  or die $USAGE;
-my $force_calc_recall = exists $opt{f} ? 1 : 0;
-my $csv_file;
-$csv_file = $opt{o} if exists $opt{o};
-my $metric = $QPS;
-if (exists $opt{l}) {
-    my $option = $opt{l};
-    if ($option eq 'avg') {
-        $metric = $AVG_LATENCY;
-    }
-    elsif ($option eq 'p99') {
-        $metric = $P99_LATENCY;
-    }
-    elsif ($option eq 'p999') {
-        $metric = $P999_LATENCY;
-    }
-    else {
-        die
-          "[error] illegal value for '-l': '$option'. Must be 'avg', 'p99' or 'p999'\n";
-    }
-}
-
-@ARGV >= 2
-  or die $USAGE;
-
-
-my $truth_file = shift @ARGV;
-my ($k, $dataset, $distance, $results) = get_all_results($metric, @ARGV);
-if (!defined $k) {
-    print STDERR "no result file found\n";
-    exit -1;
-}
-print STDERR "dataset = $dataset, distance = $distance, k = $k\n\n";
-calc_missing_recall($results, $truth_file, $force_calc_recall);
-
-my @results = sort {
-         $a->{name} cmp $b->{name}
-      or $a->{recall} <=> $b->{recall}
-      or $b->{qps} <=> $a->{qps}
-} @$results;
-printf("%-60s  %6s %16s  %s\n", '', 'Recall', $metric, 'search_param');
-for my $result (@results) {
-    my $fmt = ($metric eq $QPS) ? '%16.1f' : '%16.3f';
-    my $qps = $result->{qps};
-    $qps *= 1000 if $metric ne $QPS;    # the unit of latency is ms
-    printf("%-60s  %6.4f ${fmt}  %s\n",
-        $result->{name}, $result->{recall}, $qps, $result->{search_param});
-}
-if (defined $csv_file) {
-    open my $fh, '>', $csv_file;
-    print {$fh} ",Recall,${metric},search_param\n";
-    for my $result (@results) {
-        my $qps = $result->{qps};
-        $qps *= 1000 if $metric ne $QPS;
-        printf {$fh} (
-            "%s,%.4f,%.3f,%s\n", $result->{name}, $result->{recall},
-            $qps, $result->{search_param}
-        );
-    }
-}
-print "\n";
-calc_and_print_estimation($results, $metric, \@CONDITIONS);
-
-
-
-
-sub read_result {
-    my ($fname) = @_;
-    open my $fh, '<', $fname;
-    my %attr;
-    while (<$fh>) {
-        chomp;
-        next if /^\s*$/;
-        my $pos = index($_, ':');
-        $pos != -1
-          or die "[error] no ':' is found: '$_'\n";
-        my $key = substr($_, 0, $pos);
-        my $val = substr($_, $pos + 1);
-        $key =~ s/^\s+|\s+$//g;
-        $val =~ s/^\s+|\s+$//g;
-
-        # old version benchmark compatible
-        if ($key eq 'search_time') {
-            $key = 'average_search_time';
-            $val *= $attr{batch_size};
-        }
-        $attr{$key} = $val;
-    }
-    return \%attr;
-}
-
-sub overwrite_recall_to_result {
-    my ($fname, $recall) = @_;
-    open my $fh_in, '<', $fname;
-    $recall = sprintf("%f", $recall);
-    my $out;
-    while (<$fh_in>) {
-        s/^recall: .*/recall: $recall/;
-        $out .= $_;
-    }
-    close $fh_in;
-
-    open my $fh_out, '>', $fname;
-    print {$fh_out} $out;
-}
-
-sub append_recall_to_result {
-    my ($fname, $recall) = @_;
-    open my $fh, '>>', $fname;
-    printf {$fh} ("recall: %f\n", $recall);
-}
-
-sub get_all_results {
-    my ($metric) = shift @_;
-
-    my %fname;
-    my $wanted = sub {
-        if (-f && /\.txt$/) {
-            $fname{$File::Find::name} = 1;
-        }
-    };
-    find($wanted, @_);
-
-    my $k;
-    my $dataset;
-    my $distance;
-    my @results;
-    for my $f (sort keys %fname) {
-        print STDERR "reading $f ...\n";
-        my $attr = read_result($f);
-        if (!defined $k) {
-            $k = $attr->{k};
-            $dataset = $attr->{dataset};
-            $distance = $attr->{distance};
-        }
-        else {
-            $attr->{k} eq $k
-              or die "[error] k should be $k, but is $attr->{k} in $f\n";
-            $attr->{dataset} eq $dataset
-              or die
-              "[error] dataset should be $dataset, but is $attr->{dataset} in $f\n";
-            $attr->{distance} eq $distance
-              or die
-              "[error] distance should be $distance, but is $attr->{distance} in $f\n";
-        }
-
-        my $batch_size = $attr->{batch_size};
-        $batch_size =~ s/000000$/M/;
-        $batch_size =~ s/000$/K/;
-        my $search_param = $attr->{search_param};
-        $search_param =~ s/^{//;
-        $search_param =~ s/}$//;
-        $search_param =~ s/,/ /g;
-        $search_param =~ s/"//g;
-
-        my $qps;
-        if ($metric eq $QPS) {
-            $qps = $attr->{batch_size} / $attr->{average_search_time};
-        }
-        elsif ($metric eq $AVG_LATENCY) {
-            $qps = $attr->{average_search_time};
-        }
-        elsif ($metric eq $P99_LATENCY) {
-            exists $attr->{p99_search_time}
-              or die "[error] p99_search_time is not found\n";
-            $qps = $attr->{p99_search_time};
-        }
-        elsif ($metric eq $P999_LATENCY) {
-            exists $attr->{p999_search_time}
-              or die "[error] p999_search_time is not found\n";
-            $qps = $attr->{p999_search_time};
-        }
-        else {
-            die "[error] unknown latency type: '$metric'\n";
-        }
-        my $result = {
-            file => $f,
-            name => "$attr->{name}-batch${batch_size}",
-            search_param => $search_param,
-            qps => $qps,
-        };
-
-        if (exists $attr->{recall}) {
-            $result->{recall} = $attr->{recall};
-        }
-        push @results, $result;
-    }
-    return $k, $dataset, $distance, \@results;
-}
-
-sub read_ibin {
-    my ($fname) = @_;
-
-    open my $fh, '<:raw', $fname;
-    my $raw;
-
-    read($fh, $raw, 8);
-    my ($nrows, $dim) = unpack('LL', $raw);
-
-    my $expected_size = 8 + $nrows * $dim * 4;
-    my $size = (stat($fh))[7];
-    $size == $expected_size
-      or die(
-        "[error] expected size is $expected_size, but actual size is $size\n");
-
-    read($fh, $raw, $nrows * $dim * 4) == $nrows * $dim * 4
-      or die "[error] read $fname failed\n";
-    my @data = unpack('l' x ($nrows * $dim), $raw);
-    return \@data, $nrows, $dim;
-}
-
-sub pick_k_neighbors {
-    my ($neighbors, $nrows, $ncols, $k) = @_;
-
-    my @res;
-    for my $i (0 .. $nrows - 1) {
-        my %neighbor_set;
-        for my $j (0 .. $k - 1) {
-            $neighbor_set{$neighbors->[$i * $ncols + $j]} = 1;
-        }
-        push @res, \%neighbor_set;
-    }
-    return \@res;
-}
-
-
-sub calc_recall {
-    my ($truth_k_neighbors, $result_neighbors, $nrows, $k) = @_;
-
-    my $recall = 0;
-    for my $i (0 .. $nrows - 1) {
-        my $tp = 0;
-        for my $j (0 .. $k - 1) {
-            my $neighbor = $result_neighbors->[$i * $k + $j];
-            ++$tp if exists $truth_k_neighbors->[$i]{$neighbor};
-        }
-        $recall += $tp;
-    }
-    return $recall / $k / $nrows;
-}
-
-sub calc_missing_recall {
-    my ($results, $truth_file, $force_calc_recall) = @_;
-
-    my $need_calc_recall = grep { !exists $_->{recall} } @$results;
-    return unless $need_calc_recall || $force_calc_recall;
-
-    my ($truth_neighbors, $nrows, $truth_k) = read_ibin($truth_file);
-    $truth_k >= $k
-      or die "[error] ground truth k ($truth_k) < k($k)\n";
-    my $truth_k_neighbors =
-      pick_k_neighbors($truth_neighbors, $nrows, $truth_k, $k);
-
-    for my $result (@$results) {
-        next if exists $result->{recall} && !$force_calc_recall;
-
-        my $result_bin_file = $result->{file};
-        $result_bin_file =~ s/txt$/ibin/;
-        print STDERR "calculating recall for $result_bin_file ...\n";
-        my ($result_neighbors, $result_nrows, $result_k) =
-          read_ibin($result_bin_file);
-        $result_k == $k
-          or die
-          "[error] k should be $k, but is $result_k in $result_bin_file\n";
-        $result_nrows == $nrows
-          or die
-          "[error] #row should be $nrows, but is $result_nrows in $result_bin_file\n";
-
-        my $recall =
-          calc_recall($truth_k_neighbors, $result_neighbors, $nrows, $k);
-        if (exists $result->{recall}) {
-            my $new_value = sprintf("%f", $recall);
-            if ($result->{recall} ne $new_value) {
-                print "update recall: $result->{recall} -> $new_value\n";
-                overwrite_recall_to_result($result->{file}, $recall);
-            }
-        }
-        else {
-            append_recall_to_result($result->{file}, $recall);
-        }
-        $result->{recall} = $recall;
-    }
-}
-
-
-sub estimate {
-    my ($results, $condition, $value) = @_;
-    my %point_of;
-    for my $result (@$results) {
-        my $point;
-        if ($condition eq 'recall') {
-            $point = [$result->{recall}, $result->{qps}];
-        }
-        else {
-            $point = [$result->{qps}, $result->{recall}];
-        }
-        push @{$point_of{$result->{name}}}, $point;
-    }
-
-    my @names = sort keys %point_of;
-    my @result;
-    for my $name (@names) {
-        my @points = sort { $a->[0] <=> $b->[0] } @{$point_of{$name}};
-        if ($value < $points[0][0] || $value > $points[$#points][0]) {
-            push @result, -1;
-            next;
-        }
-        elsif ($value == $points[0][0]) {
-            push @result, $points[0][1];
-            next;
-        }
-
-        for my $i (1 .. $#points) {
-            if ($points[$i][0] >= $value) {
-                push @result,
-                  linear_interpolation($value, @{$points[$i - 1]},
-                    @{$points[$i]});
-                last;
-            }
-        }
-    }
-    return \@names, \@result;
-}
-
-sub linear_interpolation {
-    my ($x, $x1, $y1, $x2, $y2) = @_;
-    return $y1 + ($x - $x1) * ($y2 - $y1) / ($x2 - $x1);
-}
-
-sub merge {
-    my ($all, $new, $scale) = @_;
-    @$all == @$new
-      or die "[error] length is not equal\n";
-    for my $i (0 .. @$all - 1) {
-        push @{$all->[$i]}, $new->[$i] * $scale;
-    }
-}
-
-sub calc_and_print_estimation {
-    my ($results, $metric, $conditions) = @_;
-
-    my @conditions = grep {
-        my $target = $_->[0];
-        if ($target eq 'recall' || $target eq $metric) {
-            1;
-        }
-        else {
-                 $target eq $QPS
-              || $target eq $AVG_LATENCY
-              || $target eq $P99_LATENCY
-              || $target eq $P999_LATENCY
-              or die "[error] unknown condition: '$target'\n";
-            0;
-        }
-    } @$conditions;
-
-    my @headers = map {
-        my $header;
-        if ($_->[0] eq 'recall') {
-            $header = $metric . '@recall' . $_->[1];
-        }
-        elsif ($_->[0] eq $metric) {
-            $header = 'recall@' . $metric . $_->[1];
-        }
-        $header;
-    } @conditions;
-
-    my $scale = ($metric eq $QPS) ? 1 : 1000;
-    my $estimations;
-    for my $condition (@conditions) {
-        my ($names, $estimate) = estimate($results, @$condition);
-        if (!defined $estimations) {
-            @$estimations = map { [$_] } @$names;
-        }
-        merge($estimations, $estimate, $scale);
-    }
-
-    my $fmt = "%-60s" . ("  %16s" x @headers) . "\n";
-    printf($fmt, '', @headers);
-    $fmt =~ s/16s/16.4f/g;
-    for (@$estimations) {
-        printf($fmt, @$_);
-    }
-}
diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp
index 8f73896e07..828731c3b3 100644
--- a/cpp/bench/ann/src/common/ann_types.hpp
+++ b/cpp/bench/ann/src/common/ann_types.hpp
@@ -17,40 +17,62 @@
  */
 
 #pragma once
+
+#include <stdexcept>
 #include <string>
 #include <vector>
 
-#include <cuda_runtime_api.h>
+#include <cuda_runtime_api.h>  // cudaStream_t
 
 namespace raft::bench::ann {
 
-enum class Metric {
-  kInnerProduct,
-  kEuclidean,
-};
-
 enum class MemoryType {
   Host,
   HostMmap,
   Device,
 };
 
+enum class Metric {
+  kInnerProduct,
+  kEuclidean,
+};
+
+inline auto parse_metric(const std::string& metric_str) -> Metric
+{
+  if (metric_str == "inner_product") {
+    return raft::bench::ann::Metric::kInnerProduct;
+  } else if (metric_str == "euclidean") {
+    return raft::bench::ann::Metric::kEuclidean;
+  } else {
+    throw std::runtime_error("invalid metric: '" + metric_str + "'");
+  }
+}
+
 struct AlgoProperty {
   MemoryType dataset_memory_type;
   // neighbors/distances should have same memory type as queries
   MemoryType query_memory_type;
-  bool need_dataset_when_search;
+};
+
+class AnnBase {
+ public:
+  inline AnnBase(Metric metric, int dim) : metric_(metric), dim_(dim) {}
+  virtual ~AnnBase() = default;
+
+ protected:
+  Metric metric_;
+  int dim_;
 };
 
 template <typename T>
-class ANN {
+class ANN : public AnnBase {
  public:
   struct AnnSearchParam {
     virtual ~AnnSearchParam() = default;
+    [[nodiscard]] virtual auto needs_dataset() const -> bool { return false; };
   };
 
-  ANN(Metric metric, int dim) : metric_(metric), dim_(dim) {}
-  virtual ~ANN() = default;
+  inline ANN(Metric metric, int dim) : AnnBase(metric, dim) {}
 
   virtual void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) = 0;
 
@@ -79,10 +101,14 @@ class ANN {
   // The client code should call set_search_dataset() before searching,
   // and should not release dataset before searching is finished.
   virtual void set_search_dataset(const T* /*dataset*/, size_t /*nrow*/){};
-
- protected:
-  Metric metric_;
-  int dim_;
 };
 
 }  // namespace raft::bench::ann
+
+#define REGISTER_ALGO_INSTANCE(DataT)                                                            \
+  template auto raft::bench::ann::create_algo<DataT>(                                            \
+    const std::string&, const std::string&, int, const nlohmann::json&, const std::vector<int>&) \
+    ->std::unique_ptr<raft::bench::ann::ANN<DataT>>;                                             \
+  template auto raft::bench::ann::create_search_param<DataT>(const std::string&,                 \
+                                                             const nlohmann::json&)              \
+    ->std::unique_ptr<typename raft::bench::ann::ANN<DataT>::AnnSearchParam>;
diff --git a/cpp/bench/ann/src/common/benchmark.cpp b/cpp/bench/ann/src/common/benchmark.cpp
new file mode 100644
index 0000000000..c73f2ed22a
--- /dev/null
+++ b/cpp/bench/ann/src/common/benchmark.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ann_types.hpp"
+
+#define JSON_DIAGNOSTICS 1
+#include <nlohmann/json.hpp>
+
+#include <memory>
+#include <unordered_map>
+
+#include <dlfcn.h>
+#include <filesystem>
+
+namespace raft::bench::ann {
+
+struct lib_handle {
+  void* handle{nullptr};
+  explicit lib_handle(const std::string& name)
+  {
+    handle = dlopen(name.c_str(), RTLD_LAZY | RTLD_LOCAL);
+    if (handle == nullptr) {
+      auto error_msg = "Failed to load " + name;
+      auto err       = dlerror();
+      if (err != nullptr && err[0] != '\0') { error_msg += ": " + std::string(err); }
+      throw std::runtime_error(error_msg);
+    }
+  }
+  ~lib_handle() noexcept
+  {
+    if (handle != nullptr) { dlclose(handle); }
+  }
+};
+
+auto load_lib(const std::string& algo) -> void*
+{
+  static std::unordered_map<std::string, lib_handle> libs{};
+  auto found = libs.find(algo);
+
+  if (found != libs.end()) { return found->second.handle; }
+  auto lib_name = "lib" + algo + "_ann_bench.so";
+  return libs.emplace(algo, lib_name).first->second.handle;
+}
+
+auto get_fun_name(void* addr) -> std::string
+{
+  Dl_info dl_info;
+  if (dladdr(addr, &dl_info) != 0) {
+    if (dl_info.dli_sname != nullptr && dl_info.dli_sname[0] != '\0') {
+      return std::string{dl_info.dli_sname};
+    }
+  }
+  throw std::logic_error("Failed to find out name of the looked up function");
+}
+
+template <typename T>
+auto create_algo(const std::string& algo,
+                 const std::string& distance,
+                 int dim,
+                 const nlohmann::json& conf,
+                 const std::vector<int>& dev_list) -> std::unique_ptr<raft::bench::ann::ANN<T>>
+{
+  static auto fname = get_fun_name(reinterpret_cast<void*>(&create_algo<T>));
+  auto handle       = load_lib(algo);
+  auto fun_addr     = dlsym(handle, fname.c_str());
+  if (fun_addr == nullptr) {
+    throw std::runtime_error("Couldn't load the create_algo function (" + algo + ")");
+  }
+  auto fun = reinterpret_cast<decltype(&create_algo<T>)>(fun_addr);
+  return fun(algo, distance, dim, conf, dev_list);
+}
+
+template <typename T>
+std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search_param(
+  const std::string& algo, const nlohmann::json& conf)
+{
+  static auto fname = get_fun_name(reinterpret_cast<void*>(&create_search_param<T>));
+  auto handle       = load_lib(algo);
+  auto fun_addr     = dlsym(handle, fname.c_str());
+  if (fun_addr == nullptr) {
+    throw std::runtime_error("Couldn't load the create_search_param function (" + algo + ")");
+  }
+  auto fun = reinterpret_cast<decltype(&create_search_param<T>)>(fun_addr);
+  return fun(algo, conf);
+}
+
+};  // namespace raft::bench::ann
+
+REGISTER_ALGO_INSTANCE(float);
+REGISTER_ALGO_INSTANCE(std::int8_t);
+REGISTER_ALGO_INSTANCE(std::uint8_t);
+
+#include "benchmark.hpp"
+
+int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index 28df4640ee..97f23838a9 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -13,594 +13,506 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifdef NVTX
-#include <nvtx3/nvToolsExt.h>
-#endif
-#include <unistd.h>
+#pragma once
+
+#include "ann_types.hpp"
+#include "conf.hpp"
+#include "cuda_stub.hpp"
+#include "dataset.hpp"
+#include "util.hpp"
+
+#include <benchmark/benchmark.h>
 
 #include <algorithm>
 #include <cmath>
 #include <cstdint>
 #include <fstream>
-#include <iostream>
 #include <limits>
 #include <memory>
 #include <numeric>
 #include <string>
-#include <unordered_set>
+#include <unistd.h>
 #include <vector>
 
-#include <raft/util/integer_utils.hpp>
-
-#include "benchmark_util.hpp"
-#include "conf.h"
-#include "dataset.h"
-#include "util.h"
+namespace raft::bench::ann {
 
-using std::cerr;
-using std::cout;
-using std::endl;
-using std::string;
-using std::to_string;
-using std::unordered_set;
-using std::vector;
+static inline std::unique_ptr<AnnBase> current_algo{nullptr};
 
-namespace raft::bench::ann {
+using kv_series = std::vector<std::tuple<std::string, std::vector<nlohmann::json>>>;
 
-inline bool check_file_exist(const std::vector<string>& files)
+inline auto apply_overrides(const std::vector<nlohmann::json>& configs,
+                            const kv_series& overrides,
+                            std::size_t override_idx = 0) -> std::vector<nlohmann::json>
 {
-  bool ret = true;
-  std::unordered_set<std::string> processed;
-  for (const auto& file : files) {
-    if (processed.find(file) == processed.end() && !file_exists(file)) {
-      log_error("file '%s' doesn't exist or is not a regular file", file.c_str());
-      ret = false;
+  std::vector<nlohmann::json> results{};
+  if (override_idx >= overrides.size()) {
+    auto n = configs.size();
+    for (size_t i = 0; i < n; i++) {
+      auto c               = configs[i];
+      c["override_suffix"] = n > 1 ? "/" + std::to_string(i) : "";
+      results.push_back(c);
     }
-    processed.insert(file);
+    return results;
   }
-  return ret;
-}
-
-inline bool check_file_not_exist(const std::vector<std::string>& files, bool force_overwrite)
-{
-  bool ret = true;
-  for (const auto& file : files) {
-    if (file_exists(file)) {
-      if (force_overwrite) {
-        log_warn("'%s' already exists, will overwrite it", file.c_str());
-      } else {
-        log_error("'%s' already exists, use '-f' to force overwriting", file.c_str());
-        ret = false;
+  auto rec_configs = apply_overrides(configs, overrides, override_idx + 1);
+  auto [key, vals] = overrides[override_idx];
+  auto n           = vals.size();
+  for (size_t i = 0; i < n; i++) {
+    const auto& val = vals[i];
+    for (auto rc : rec_configs) {
+      if (n > 1) {
+        rc["override_suffix"] =
+          static_cast<std::string>(rc["override_suffix"]) + "/" + std::to_string(i);
       }
+      rc[key] = val;
+      results.push_back(rc);
     }
   }
-  return ret;
+  return results;
 }
 
-inline bool check_no_duplicate_file(const std::vector<std::string>& files)
+inline auto apply_overrides(const nlohmann::json& config,
+                            const kv_series& overrides,
+                            std::size_t override_idx = 0)
 {
-  bool ret = true;
-  std::unordered_set<string> processed;
-  for (const auto& file : files) {
-    if (processed.find(file) != processed.end()) {
-      log_error("'%s' occurs more than once as output file, would be overwritten", file.c_str());
-      ret = false;
-    }
-    processed.insert(file);
-  }
-  return ret;
+  return apply_overrides(std::vector{config}, overrides, 0);
 }
 
-inline bool mkdir(const std::vector<std::string>& dirs)
+inline void dump_parameters(::benchmark::State& state, nlohmann::json params)
 {
-  std::unordered_set<string> processed;
-  for (const auto& dir : dirs) {
-    if (processed.find(dir) == processed.end() && !dir_exists(dir)) {
-      if (create_dir(dir)) {
-        log_info("mkdir '%s'", dir.c_str());
+  std::string label = "";
+  bool label_empty  = true;
+  for (auto& [key, val] : params.items()) {
+    if (val.is_number()) {
+      state.counters.insert({{key, val}});
+    } else if (val.is_boolean()) {
+      state.counters.insert({{key, val ? 1.0 : 0.0}});
+    } else {
+      auto kv = key + "=" + val.dump();
+      if (label_empty) {
+        label = kv;
       } else {
-        log_error("fail to create output directory '%s'", dir.c_str());
-        // won't create any other dir when problem occurs
-        return false;
+        label += "#" + kv;
       }
+      label_empty = false;
     }
-    processed.insert(dir);
   }
-  return true;
+  if (!label_empty) { state.SetLabel(label); }
 }
 
-inline bool check(const std::vector<Configuration::Index>& indices,
-                  const bool build_mode,
-                  const bool force_overwrite)
+template <typename T>
+void bench_build(::benchmark::State& state,
+                 std::shared_ptr<const Dataset<T>> dataset,
+                 Configuration::Index index,
+                 bool force_overwrite)
 {
-  std::vector<std::string> files_should_exist;
-  std::vector<std::string> dirs_should_exist;
-  std::vector<std::string> output_files;
-  for (const auto& index : indices) {
-    if (build_mode) {
-      output_files.push_back(index.file);
-      output_files.push_back(index.file + ".txt");
-
-      const auto pos = index.file.rfind('/');
-      if (pos != std::string::npos) { dirs_should_exist.push_back(index.file.substr(0, pos)); }
+  if (file_exists(index.file)) {
+    if (force_overwrite) {
+      log_info("Overwriting file: %s", index.file.c_str());
     } else {
-      files_should_exist.push_back(index.file);
-      files_should_exist.push_back(index.file + ".txt");
+      return state.SkipWithMessage(
+        "Index file already exists (use --overwrite to overwrite the index).");
+    }
+  }
 
-      output_files.push_back(index.search_result_file + ".0.ibin");
-      output_files.push_back(index.search_result_file + ".0.txt");
+  std::unique_ptr<ANN<T>> algo;
+  try {
+    algo = ann::create_algo<T>(
+      index.algo, dataset->distance(), dataset->dim(), index.build_param, index.dev_list);
+  } catch (const std::exception& e) {
+    return state.SkipWithError("Failed to create an algo: " + std::string(e.what()));
+  }
 
-      const auto pos = index.search_result_file.rfind('/');
-      if (pos != std::string::npos) {
-        dirs_should_exist.push_back(index.search_result_file.substr(0, pos));
+  const auto algo_property = algo->get_property();
+
+  const T* base_set      = dataset->base_set(algo_property.dataset_memory_type);
+  std::size_t index_size = dataset->base_set_size();
+
+  cuda_timer gpu_timer;
+  {
+    nvtx_case nvtx{state.name()};
+    for (auto _ : state) {
+      auto ntx_lap = nvtx.lap();
+      auto gpu_lap = gpu_timer.lap();
+      try {
+        algo->build(base_set, index_size, gpu_timer.stream());
+      } catch (const std::exception& e) {
+        state.SkipWithError(std::string(e.what()));
       }
     }
   }
+  state.counters.insert(
+    {{"GPU Time", gpu_timer.total_time() / state.iterations()}, {"index_size", index_size}});
+  dump_parameters(state, index.build_param);
 
-  bool ret = true;
-  if (!check_file_exist(files_should_exist)) { ret = false; }
-  if (!check_file_not_exist(output_files, force_overwrite)) { ret = false; }
-  if (!check_no_duplicate_file(output_files)) { ret = false; }
-  if (ret && !mkdir(dirs_should_exist)) { ret = false; }
-  return ret;
-}
-
-inline void write_build_info(const std::string& file_prefix,
-                             const std::string& dataset,
-                             const std::string& distance,
-                             const std::string& name,
-                             const std::string& algo,
-                             const std::string& build_param,
-                             const float build_time)
-{
-  std::ofstream ofs(file_prefix + ".txt");
-  if (!ofs) { throw std::runtime_error("can't open build info file: " + file_prefix + ".txt"); }
-  ofs << "dataset: " << dataset << "\n"
-      << "distance: " << distance << "\n"
-      << "\n"
-      << "name: " << name << "\n"
-      << "algo: " << algo << "\n"
-      << "build_param: " << build_param << "\n"
-      << "build_time: " << build_time << endl;
-  ofs.close();
-  if (!ofs) { throw std::runtime_error("can't write to build info file: " + file_prefix + ".txt"); }
+  if (state.skipped()) { return; }
+  make_sure_parent_dir_exists(index.file);
+  algo->save(index.file);
 }
 
 template <typename T>
-void build(const Dataset<T>* dataset, const std::vector<Configuration::Index>& indices)
+void bench_search(::benchmark::State& state,
+                  std::shared_ptr<const Dataset<T>> dataset,
+                  Configuration::Index index,
+                  std::size_t search_param_ix)
 {
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-
-  log_info(
-    "base set from dataset '%s', #vector = %zu", dataset->name().c_str(), dataset->base_set_size());
-
-  for (const auto& index : indices) {
-    log_info("creating algo '%s', param=%s", index.algo.c_str(), index.build_param.dump().c_str());
-    const auto algo          = create_algo<T>(index.algo,
-                                     dataset->distance(),
-                                     dataset->dim(),
-                                     index.refine_ratio,
-                                     index.build_param,
-                                     index.dev_list);
-    const auto algo_property = algo->get_property();
-
-    const T* base_set_ptr = nullptr;
-    if (algo_property.dataset_memory_type == MemoryType::Host) {
-      log_info("%s", "loading base set to memory");
-      base_set_ptr = dataset->base_set();
-    } else if (algo_property.dataset_memory_type == MemoryType::HostMmap) {
-      log_info("%s", "mapping base set to memory");
-      base_set_ptr = dataset->mapped_base_set();
-    } else if (algo_property.dataset_memory_type == MemoryType::Device) {
-      log_info("%s", "loading base set to GPU");
-      base_set_ptr = dataset->base_set_on_gpu();
+  const auto& sp_json = index.search_params[search_param_ix];
+
+  // NB: `k` and `n_queries` are guaranteed to be populated in conf.cpp
+  const std::uint32_t k = sp_json["k"];
+  // Amount of data processes in one go
+  const std::size_t n_queries = sp_json["n_queries"];
+  // Round down the query data to a multiple of the batch size to loop over full batches of data
+  const std::size_t query_set_size = (dataset->query_set_size() / n_queries) * n_queries;
+
+  if (!file_exists(index.file)) {
+    state.SkipWithError("Index file is missing. Run the benchmark in the build mode first.");
+    return;
+  }
+  // algo is static to cache it between close search runs to save time on index loading
+  static std::string index_file = "";
+  if (index.file != index_file) {
+    current_algo.reset();
+    index_file = index.file;
+  }
+  ANN<T>* algo;
+  std::unique_ptr<typename ANN<T>::AnnSearchParam> search_param;
+  try {
+    if (!current_algo || (algo = dynamic_cast<ANN<T>*>(current_algo.get())) == nullptr) {
+      auto ualgo = ann::create_algo<T>(
+        index.algo, dataset->distance(), dataset->dim(), index.build_param, index.dev_list);
+      algo = ualgo.get();
+      algo->load(index_file);
+      current_algo = std::move(ualgo);
     }
+    search_param = ann::create_search_param<T>(index.algo, sp_json);
+  } catch (const std::exception& e) {
+    return state.SkipWithError("Failed to create an algo: " + std::string(e.what()));
+  }
+  algo->set_search_param(*search_param);
+
+  const auto algo_property = algo->get_property();
+  const T* query_set       = dataset->query_set(algo_property.query_memory_type);
+  buf<float> distances{algo_property.query_memory_type, k * query_set_size};
+  buf<std::size_t> neighbors{algo_property.query_memory_type, k * query_set_size};
+
+  if (search_param->needs_dataset()) {
+    try {
+      algo->set_search_dataset(dataset->base_set(algo_property.dataset_memory_type),
+                               dataset->base_set_size());
+    } catch (const std::exception&) {
+      state.SkipWithError("The algorithm '" + index.name +
+                          "' requires the base set, but it's not available.");
+      return;
+    }
+  }
 
-    log_info("building index '%s'", index.name.c_str());
-    RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-#ifdef NVTX
-    nvtxRangePush("build");
-#endif
-    Timer timer;
-    algo->build(base_set_ptr, dataset->base_set_size(), stream);
-    RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-    const float elapsed_ms = timer.elapsed_ms();
-#ifdef NVTX
-    nvtxRangePop();
-#endif
-    log_info("built index in %.2f seconds", elapsed_ms / 1000.0f);
-    RAFT_CUDA_TRY(cudaDeviceSynchronize());
-    RAFT_CUDA_TRY(cudaPeekAtLastError());
-
-    algo->save(index.file);
-    write_build_info(index.file,
-                     dataset->name(),
-                     dataset->distance(),
-                     index.name,
-                     index.algo,
-                     index.build_param.dump(),
-                     elapsed_ms / 1000.0f);
-    log_info("saved index to %s", index.file.c_str());
+  std::ptrdiff_t batch_offset   = 0;
+  std::size_t queries_processed = 0;
+  cuda_timer gpu_timer;
+  {
+    nvtx_case nvtx{state.name()};
+    for (auto _ : state) {
+      // measure the GPU time using the RAII helper
+      auto ntx_lap = nvtx.lap();
+      auto gpu_lap = gpu_timer.lap();
+      // run the search
+      try {
+        algo->search(query_set + batch_offset * dataset->dim(),
+                     n_queries,
+                     k,
+                     neighbors.data + batch_offset * k,
+                     distances.data + batch_offset * k,
+                     gpu_timer.stream());
+      } catch (const std::exception& e) {
+        state.SkipWithError(std::string(e.what()));
+      }
+      // advance to the next batch
+      batch_offset = (batch_offset + n_queries) % query_set_size;
+      queries_processed += n_queries;
+    }
+  }
+  state.SetItemsProcessed(queries_processed);
+  state.counters.insert({{"k", k}, {"n_queries", n_queries}});
+  if (cudart.found()) {
+    state.counters.insert({{"GPU Time", gpu_timer.total_time() / state.iterations()},
+                           {"GPU QPS", queries_processed / gpu_timer.total_time()}});
+  }
+  dump_parameters(state, sp_json);
+  if (state.skipped()) { return; }
+
+  // evaluate recall
+  if (dataset->max_k() >= k) {
+    const std::int32_t* gt          = dataset->gt_set();
+    const std::uint32_t max_k       = dataset->max_k();
+    buf<std::size_t> neighbors_host = neighbors.move(MemoryType::Host);
+
+    std::size_t rows        = std::min(queries_processed, query_set_size);
+    std::size_t match_count = 0;
+    std::size_t total_count = rows * static_cast<size_t>(k);
+    for (std::size_t i = 0; i < rows; i++) {
+      for (std::uint32_t j = 0; j < k; j++) {
+        auto act_idx = std::int32_t(neighbors_host.data[i * k + j]);
+        for (std::uint32_t l = 0; l < k; l++) {
+          auto exp_idx = gt[i * max_k + l];
+          if (act_idx == exp_idx) {
+            match_count++;
+            break;
+          }
+        }
+      }
+    }
+    double actual_recall = static_cast<double>(match_count) / static_cast<double>(total_count);
+    state.counters.insert({{"Recall", actual_recall}});
   }
+}
 
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
+inline void printf_usage()
+{
+  ::benchmark::PrintDefaultHelp();
+  fprintf(
+    stdout,
+    "          [--build|--search] \n"
+    "          [--overwrite]\n"
+    "          [--data_prefix=<prefix>]\n"
+    "          [--index_prefix=<prefix>]\n"
+    "          [--override_kv=<key:value1:value2:...:valueN>]\n"
+    "          <conf>.json\n"
+    "\n"
+    "Note the non-standard benchmark parameters:\n"
+    "  --build: build mode, will build index\n"
+    "  --search: search mode, will search using the built index\n"
+    "            one and only one of --build and --search should be specified\n"
+    "  --overwrite: force overwriting existing index files\n"
+    "  --data_prefix=<prefix>:"
+    " prepend <prefix> to dataset file paths specified in the <conf>.json (default = 'data/').\n"
+    "  --index_prefix=<prefix>:"
+    " prepend <prefix> to index file paths specified in the <conf>.json (default = 'index/').\n"
+    "  --override_kv=<key:value1:value2:...:valueN>:"
+    " override a build/search key one or more times multiplying the number of configurations;"
+    " you can use this parameter multiple times to get the Cartesian product of benchmark"
+    " configs.\n");
 }
 
-inline void write_search_result(const std::string& file_prefix,
-                                const std::string& dataset,
-                                const std::string& distance,
-                                const std::string& name,
-                                const std::string& algo,
-                                const std::string& build_param,
-                                const std::string& search_param,
-                                std::size_t batch_size,
-                                unsigned run_count,
-                                unsigned k,
-                                float search_time_average,
-                                float search_time_p99,
-                                float search_time_p999,
-                                float query_per_second,
-                                const int* neighbors,
-                                size_t query_set_size)
+template <typename T>
+void register_build(std::shared_ptr<const Dataset<T>> dataset,
+                    std::vector<Configuration::Index> indices,
+                    bool force_overwrite)
 {
-  log_info("throughput : %e [QPS]", query_per_second);
-  std::ofstream ofs(file_prefix + ".txt");
-  if (!ofs) { throw std::runtime_error("can't open search result file: " + file_prefix + ".txt"); }
-  ofs << "dataset: " << dataset << "\n"
-      << "distance: " << distance << "\n"
-      << "\n"
-      << "name: " << name << "\n"
-      << "algo: " << algo << "\n"
-      << "build_param: " << build_param << "\n"
-      << "search_param: " << search_param << "\n"
-      << "\n"
-      << "batch_size: " << batch_size << "\n"
-      << "run_count: " << run_count << "\n"
-      << "k: " << k << "\n"
-      << "query_per_second: " << query_per_second << "\n"
-      << "average_search_time: " << search_time_average << endl;
-
-  if (search_time_p99 != std::numeric_limits<float>::max()) {
-    ofs << "p99_search_time: " << search_time_p99 << endl;
-  }
-  if (search_time_p999 != std::numeric_limits<float>::max()) {
-    ofs << "p999_search_time: " << search_time_p999 << endl;
+  for (auto index : indices) {
+    auto suf      = static_cast<std::string>(index.build_param["override_suffix"]);
+    auto file_suf = suf;
+    index.build_param.erase("override_suffix");
+    std::replace(file_suf.begin(), file_suf.end(), '/', '-');
+    index.file += file_suf;
+    auto* b = ::benchmark::RegisterBenchmark(
+      index.name + suf, bench_build<T>, dataset, index, force_overwrite);
+    b->Unit(benchmark::kSecond);
+    b->UseRealTime();
   }
+}
 
-  ofs.close();
-  if (!ofs) {
-    throw std::runtime_error("can't write to search result file: " + file_prefix + ".txt");
+template <typename T>
+void register_search(std::shared_ptr<const Dataset<T>> dataset,
+                     std::vector<Configuration::Index> indices)
+{
+  for (auto index : indices) {
+    for (std::size_t i = 0; i < index.search_params.size(); i++) {
+      auto suf = static_cast<std::string>(index.search_params[i]["override_suffix"]);
+      index.search_params[i].erase("override_suffix");
+      auto* b =
+        ::benchmark::RegisterBenchmark(index.name + suf, bench_search<T>, dataset, index, i);
+      b->Unit(benchmark::kMillisecond);
+      b->UseRealTime();
+    }
   }
-
-  BinFile<int> neighbors_file(file_prefix + ".ibin", "w");
-  neighbors_file.write(neighbors, query_set_size, k);
 }
 
 template <typename T>
-inline void search(const Dataset<T>* dataset, const std::vector<Configuration::Index>& indices)
+void dispatch_benchmark(const Configuration& conf,
+                        bool force_overwrite,
+                        bool build_mode,
+                        bool search_mode,
+                        std::string data_prefix,
+                        std::string index_prefix,
+                        kv_series override_kv)
 {
-  if (indices.empty()) { return; }
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-
-  log_info("loading query set from dataset '%s', #vector = %zu",
-           dataset->name().c_str(),
-           dataset->query_set_size());
-  const T* const query_set = dataset->query_set();
-  // query set is usually much smaller than base set, so load it eagerly
-  const T* const d_query_set  = dataset->query_set_on_gpu();
-  const size_t query_set_size = dataset->query_set_size();
-
-  // currently all indices has same batch_size, k and run_count
-  const std::size_t batch_size = indices[0].batch_size;
-  const unsigned k             = indices[0].k;
-  const unsigned run_count     = indices[0].run_count;
-  log_info(
-    "basic search parameters: batch_size = %d, k = %d, run_count = %d", batch_size, k, run_count);
-  if (query_set_size % batch_size != 0) {
-    log_warn("query set size (%zu) % batch size (%d) != 0, the size of last batch is %zu",
-             query_set_size,
-             batch_size,
-             query_set_size % batch_size);
+  if (cudart.found()) {
+    for (auto [key, value] : cuda_info()) {
+      ::benchmark::AddCustomContext(key, value);
+    }
   }
-  const std::size_t num_batches = (query_set_size - 1) / batch_size + 1;
-  std::size_t* const neighbors  = new std::size_t[query_set_size * k];
-  int* const neighbors_buf      = new int[query_set_size * k];
-  float* const distances        = new float[query_set_size * k];
-  std::vector<float> search_times;
-  search_times.reserve(num_batches);
-  std::size_t* d_neighbors;
-  float* d_distances;
-  RAFT_CUDA_TRY(cudaMalloc((void**)&d_neighbors, query_set_size * k * sizeof(*d_neighbors)));
-  RAFT_CUDA_TRY(cudaMalloc((void**)&d_distances, query_set_size * k * sizeof(*d_distances)));
-
-  for (const auto& index : indices) {
-    log_info("creating algo '%s', param=%s", index.algo.c_str(), index.build_param.dump().c_str());
-    const auto algo          = create_algo<T>(index.algo,
-                                     dataset->distance(),
-                                     dataset->dim(),
-                                     index.refine_ratio,
-                                     index.build_param,
-                                     index.dev_list);
-    const auto algo_property = algo->get_property();
-
-    log_info("loading index '%s' from file '%s'", index.name.c_str(), index.file.c_str());
-    algo->load(index.file);
-
-    const T* this_query_set     = query_set;
-    std::size_t* this_neighbors = neighbors;
-    float* this_distances       = distances;
-    if (algo_property.query_memory_type == MemoryType::Device) {
-      this_query_set = d_query_set;
-      this_neighbors = d_neighbors;
-      this_distances = d_distances;
+  const auto dataset_conf = conf.get_dataset_conf();
+  auto base_file          = combine_path(data_prefix, dataset_conf.base_file);
+  auto query_file         = combine_path(data_prefix, dataset_conf.query_file);
+  auto gt_file            = dataset_conf.groundtruth_neighbors_file;
+  if (gt_file.has_value()) { gt_file.emplace(combine_path(data_prefix, gt_file.value())); }
+  auto dataset = std::make_shared<BinDataset<T>>(dataset_conf.name,
+                                                 base_file,
+                                                 dataset_conf.subset_first_row,
+                                                 dataset_conf.subset_size,
+                                                 query_file,
+                                                 dataset_conf.distance,
+                                                 gt_file);
+  ::benchmark::AddCustomContext("dataset", dataset_conf.name);
+  ::benchmark::AddCustomContext("distance", dataset_conf.distance);
+  std::vector<Configuration::Index> indices = conf.get_indices();
+  if (build_mode) {
+    if (file_exists(base_file)) {
+      log_info("Using the dataset file '%s'", base_file.c_str());
+      ::benchmark::AddCustomContext("n_records", std::to_string(dataset->base_set_size()));
+      ::benchmark::AddCustomContext("dim", std::to_string(dataset->dim()));
+    } else {
+      log_warn("Dataset file '%s' does not exist; benchmarking index building is impossible.",
+               base_file.c_str());
     }
-
-    if (algo_property.need_dataset_when_search) {
-      log_info("loading base set from dataset '%s', #vector = %zu",
-               dataset->name().c_str(),
-               dataset->base_set_size());
-      const T* base_set_ptr = nullptr;
-      if (algo_property.dataset_memory_type == MemoryType::Host) {
-        log_info("%s", "loading base set to memory");
-        base_set_ptr = dataset->base_set();
-      } else if (algo_property.dataset_memory_type == MemoryType::HostMmap) {
-        log_info("%s", "mapping base set to memory");
-        base_set_ptr = dataset->mapped_base_set();
-      } else if (algo_property.dataset_memory_type == MemoryType::Device) {
-        log_info("%s", "loading base set to GPU");
-        base_set_ptr = dataset->base_set_on_gpu();
+    std::vector<Configuration::Index> more_indices{};
+    for (auto& index : indices) {
+      for (auto param : apply_overrides(index.build_param, override_kv)) {
+        auto modified_index        = index;
+        modified_index.build_param = param;
+        modified_index.file        = combine_path(index_prefix, modified_index.file);
+        more_indices.push_back(modified_index);
       }
-      algo->set_search_dataset(base_set_ptr, dataset->base_set_size());
     }
-
-    for (int i = 0, end_i = index.search_params.size(); i != end_i; ++i) {
-      const auto p_param = create_search_param<T>(index.algo, index.search_params[i]);
-      algo->set_search_param(*p_param);
-      log_info("search with param: %s", index.search_params[i].dump().c_str());
-
-      if (algo_property.query_memory_type == MemoryType::Device) {
-        RAFT_CUDA_TRY(cudaMemset(d_neighbors, 0, query_set_size * k * sizeof(*d_neighbors)));
-        RAFT_CUDA_TRY(cudaMemset(d_distances, 0, query_set_size * k * sizeof(*d_distances)));
-      } else {
-        memset(neighbors, 0, query_set_size * k * sizeof(*neighbors));
-        memset(distances, 0, query_set_size * k * sizeof(*distances));
-      }
-
-      float best_search_time_average = std::numeric_limits<float>::max();
-      float best_search_time_p99     = std::numeric_limits<float>::max();
-      float best_search_time_p999    = std::numeric_limits<float>::max();
-      float total_search_time        = 0;
-      for (unsigned run = 0; run < run_count; ++run) {
-        log_info("run %d / %d", run + 1, run_count);
-        for (std::size_t batch_id = 0; batch_id < num_batches; ++batch_id) {
-          const std::size_t row = batch_id * batch_size;
-          const std::size_t actual_batch_size =
-            (batch_id == num_batches - 1) ? query_set_size - row : batch_size;
-          RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-#ifdef NVTX
-          string nvtx_label = "batch" + to_string(batch_id);
-          if (run_count != 1) { nvtx_label = "run" + to_string(run) + "-" + nvtx_label; }
-          if (batch_id == 10) {
-            run = run_count - 1;
-            break;
-          }
-#endif
-          Timer timer;
-#ifdef NVTX
-          nvtxRangePush(nvtx_label.c_str());
-#endif
-          algo->search(this_query_set + row * dataset->dim(),
-                       actual_batch_size,
-                       k,
-                       this_neighbors + row * k,
-                       this_distances + row * k,
-                       stream);
-          RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-          const float elapsed_ms = timer.elapsed_ms();
-#ifdef NVTX
-          nvtxRangePop();
-#endif
-          // If the size of the last batch is less than batch_size, don't count it for
-          // search time. But neighbors of the last batch will still be filled, so it's
-          // counted for recall calculation.
-          if (actual_batch_size == batch_size) {
-            search_times.push_back(elapsed_ms / 1000.0f);  // in seconds
-          }
-        }
-
-        const float total_search_time_run =
-          std::accumulate(search_times.cbegin(), search_times.cend(), 0.0f);
-        const float search_time_average = total_search_time_run / search_times.size();
-        total_search_time += total_search_time_run;
-        best_search_time_average = std::min(best_search_time_average, search_time_average);
-
-        if (search_times.size() >= 100) {
-          std::sort(search_times.begin(), search_times.end());
-
-          const auto calc_percentile_pos = [](float percentile, size_t N) {
-            return static_cast<size_t>(std::ceil(percentile / 100.0 * N)) - 1;
-          };
-
-          const float search_time_p99 = search_times[calc_percentile_pos(99, search_times.size())];
-          best_search_time_p99        = std::min(best_search_time_p99, search_time_p99);
-
-          if (search_times.size() >= 1000) {
-            const float search_time_p999 =
-              search_times[calc_percentile_pos(99.9, search_times.size())];
-            best_search_time_p999 = std::min(best_search_time_p999, search_time_p999);
-          }
+    register_build<T>(dataset, more_indices, force_overwrite);
+  } else if (search_mode) {
+    if (file_exists(query_file)) {
+      log_info("Using the query file '%s'", query_file.c_str());
+      ::benchmark::AddCustomContext("max_n_queries", std::to_string(dataset->query_set_size()));
+      ::benchmark::AddCustomContext("dim", std::to_string(dataset->dim()));
+      if (gt_file.has_value()) {
+        if (file_exists(*gt_file)) {
+          log_info("Using the ground truth file '%s'", gt_file->c_str());
+          ::benchmark::AddCustomContext("max_k", std::to_string(dataset->max_k()));
+        } else {
+          log_warn("Ground truth file '%s' does not exist; the recall won't be reported.",
+                   gt_file->c_str());
         }
-        search_times.clear();
-      }
-      RAFT_CUDA_TRY(cudaDeviceSynchronize());
-      RAFT_CUDA_TRY(cudaPeekAtLastError());
-      const auto query_per_second =
-        (run_count * raft::round_down_safe(query_set_size, batch_size)) / total_search_time;
-
-      if (algo_property.query_memory_type == MemoryType::Device) {
-        RAFT_CUDA_TRY(cudaMemcpy(neighbors,
-                                 d_neighbors,
-                                 query_set_size * k * sizeof(*d_neighbors),
-                                 cudaMemcpyDeviceToHost));
-        RAFT_CUDA_TRY(cudaMemcpy(distances,
-                                 d_distances,
-                                 query_set_size * k * sizeof(*d_distances),
-                                 cudaMemcpyDeviceToHost));
-      }
-
-      for (std::size_t j = 0; j < query_set_size * k; ++j) {
-        neighbors_buf[j] = neighbors[j];
+      } else {
+        log_warn(
+          "Ground truth file is not provided; the recall won't be reported. NB: use "
+          "the 'groundtruth_neighbors_file' alongside the 'query_file' key to specify the path to "
+          "the ground truth in your conf.json.");
       }
-      write_search_result(index.search_result_file + "." + to_string(i),
-                          dataset->name(),
-                          dataset->distance(),
-                          index.name,
-                          index.algo,
-                          index.build_param.dump(),
-                          index.search_params[i].dump(),
-                          batch_size,
-                          index.run_count,
-                          k,
-                          best_search_time_average,
-                          best_search_time_p99,
-                          best_search_time_p999,
-                          query_per_second,
-                          neighbors_buf,
-                          query_set_size);
+    } else {
+      log_warn("Query file '%s' does not exist; benchmarking search is impossible.",
+               query_file.c_str());
     }
-
-    log_info("finish searching for index '%s'", index.name.c_str());
+    for (auto& index : indices) {
+      index.search_params = apply_overrides(index.search_params, override_kv);
+      index.file          = combine_path(index_prefix, index.file);
+    }
+    register_search<T>(dataset, indices);
   }
-
-  delete[] neighbors;
-  delete[] neighbors_buf;
-  delete[] distances;
-  RAFT_CUDA_TRY(cudaFree(d_neighbors));
-  RAFT_CUDA_TRY(cudaFree(d_distances));
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
 }
 
-inline const std::string usage(const string& argv0)
+inline auto parse_bool_flag(const char* arg, const char* pat, bool& result) -> bool
 {
-  return "usage: " + argv0 + " -b|s [-c] [-f] [-i index_names] conf.json\n" +
-         "   -b: build mode, will build index\n" +
-         "   -s: search mode, will search using built index\n" +
-         "       one and only one of -b and -s should be specified\n" +
-         "   -c: just check command line options and conf.json are sensible\n" +
-         "       won't build or search\n" + "   -f: force overwriting existing output files\n" +
-         "   -i: by default will build/search all the indices found in conf.json\n" +
-         "       '-i' can be used to select a subset of indices\n" +
-         "       'index_names' is a list of comma-separated index names\n" +
-         "       '*' is allowed as the last character of a name to select all matched indices\n" +
-         "       for example, -i \"hnsw1,hnsw2,faiss\" or -i \"hnsw*,faiss\"";
+  if (strcmp(arg, pat) == 0) {
+    result = true;
+    return true;
+  }
+  return false;
 }
 
-template <typename T>
-inline int dispatch_benchmark(const Configuration& conf,
-                              const std::string& index_patterns,
-                              bool force_overwrite,
-                              bool only_check,
-                              bool build_mode,
-                              bool search_mode)
+inline auto parse_string_flag(const char* arg, const char* pat, std::string& result) -> bool
 {
-  try {
-    const auto dataset_conf = conf.get_dataset_conf();
-
-    BinDataset<T> dataset(dataset_conf.name,
-                          dataset_conf.base_file,
-                          dataset_conf.subset_first_row,
-                          dataset_conf.subset_size,
-                          dataset_conf.query_file,
-                          dataset_conf.distance);
-
-    vector<Configuration::Index> indices = conf.get_indices(index_patterns);
-    if (!check(indices, build_mode, force_overwrite)) { return -1; }
-
-    std::string message = "will ";
-    message += build_mode ? "build:" : "search:";
-    for (const auto& index : indices) {
-      message += "\n  " + index.name;
-    }
-    log_info("%s", message.c_str());
-
-    if (only_check) {
-      log_info("%s", "all check passed, quit due to option -c");
-      return 0;
-    }
-
-    if (build_mode) {
-      build(&dataset, indices);
-    } else if (search_mode) {
-      search(&dataset, indices);
-    }
-  } catch (const std::exception& e) {
-    log_error("exception occurred: %s", e.what());
-    return -1;
+  auto n = strlen(pat);
+  if (strncmp(pat, arg, strlen(pat)) == 0) {
+    result = arg + n + 1;
+    return true;
   }
-
-  return 0;
+  return false;
 }
 
-inline int run_main(int argc, char** argv)
+inline auto run_main(int argc, char** argv) -> int
 {
-  bool force_overwrite = false;
-  bool build_mode      = false;
-  bool search_mode     = false;
-  bool only_check      = false;
-  std::string index_patterns("*");
-
-  int opt;
-  while ((opt = getopt(argc, argv, "bscfi:h")) != -1) {
-    switch (opt) {
-      case 'b': build_mode = true; break;
-      case 's': search_mode = true; break;
-      case 'c': only_check = true; break;
-      case 'f': force_overwrite = true; break;
-      case 'i': index_patterns = optarg; break;
-      case 'h': cout << usage(argv[0]) << endl; return -1;
-      default: cerr << "\n" << usage(argv[0]) << endl; return -1;
-    }
+  bool force_overwrite        = false;
+  bool build_mode             = false;
+  bool search_mode            = false;
+  std::string data_prefix     = "data";
+  std::string index_prefix    = "index";
+  std::string new_override_kv = "";
+  kv_series override_kv{};
+
+  char arg0_default[] = "benchmark";  // NOLINT
+  char* args_default  = arg0_default;
+  if (!argv) {
+    argc = 1;
+    argv = &args_default;
   }
-  if (build_mode == search_mode) {
-    std::cerr << "one and only one of -b and -s should be specified\n\n" << usage(argv[0]) << endl;
+  if (argc == 1) {
+    printf_usage();
     return -1;
   }
-  if (argc - optind != 1) {
-    std::cerr << usage(argv[0]) << endl;
+
+  char* conf_path = argv[--argc];
+  std::ifstream conf_stream(conf_path);
+
+  for (int i = 1; i < argc; i++) {
+    if (parse_bool_flag(argv[i], "--overwrite", force_overwrite) ||
+        parse_bool_flag(argv[i], "--build", build_mode) ||
+        parse_bool_flag(argv[i], "--search", search_mode) ||
+        parse_string_flag(argv[i], "--data_prefix", data_prefix) ||
+        parse_string_flag(argv[i], "--index_prefix", index_prefix) ||
+        parse_string_flag(argv[i], "--override_kv", new_override_kv)) {
+      if (!new_override_kv.empty()) {
+        auto kvv = split(new_override_kv, ':');
+        auto key = kvv[0];
+        std::vector<nlohmann::json> vals{};
+        for (std::size_t j = 1; j < kvv.size(); j++) {
+          vals.push_back(nlohmann::json::parse(kvv[j]));
+        }
+        override_kv.emplace_back(key, vals);
+        new_override_kv = "";
+      }
+      for (int j = i; j < argc - 1; j++) {
+        argv[j] = argv[j + 1];
+      }
+      argc--;
+      i--;
+    }
+  }
+
+  if (build_mode == search_mode) {
+    log_error("One and only one of --build and --search should be specified");
+    printf_usage();
     return -1;
   }
-  string conf_file = argv[optind];
 
-  std::ifstream conf_stream(conf_file.c_str());
   if (!conf_stream) {
-    log_error("can't open configuration file: %s", argv[optind]);
+    log_error("Can't open configuration file: %s", conf_path);
     return -1;
   }
 
-  try {
-    Configuration conf(conf_stream);
-    std::string dtype = conf.get_dataset_conf().dtype;
-
-    if (dtype == "float") {
-      return dispatch_benchmark<float>(
-        conf, index_patterns, force_overwrite, only_check, build_mode, search_mode);
-    } else if (dtype == "uint8") {
-      return dispatch_benchmark<std::uint8_t>(
-        conf, index_patterns, force_overwrite, only_check, build_mode, search_mode);
-    } else if (dtype == "int8") {
-      return dispatch_benchmark<std::int8_t>(
-        conf, index_patterns, force_overwrite, only_check, build_mode, search_mode);
-    } else {
-      log_error("datatype %s not supported", dtype);
-    }
-
-  } catch (const std::exception& e) {
-    log_error("exception occurred: %s", e.what());
+  if (!cudart.found()) { log_warn("cudart library is not found, GPU-based indices won't work."); }
+
+  Configuration conf(conf_stream);
+  std::string dtype = conf.get_dataset_conf().dtype;
+
+  if (dtype == "float") {
+    dispatch_benchmark<float>(
+      conf, force_overwrite, build_mode, search_mode, data_prefix, index_prefix, override_kv);
+  } else if (dtype == "uint8") {
+    dispatch_benchmark<std::uint8_t>(
+      conf, force_overwrite, build_mode, search_mode, data_prefix, index_prefix, override_kv);
+  } else if (dtype == "int8") {
+    dispatch_benchmark<std::int8_t>(
+      conf, force_overwrite, build_mode, search_mode, data_prefix, index_prefix, override_kv);
+  } else {
+    log_error("datatype '%s' is not supported", dtype.c_str());
     return -1;
   }
 
-  return -1;
+  ::benchmark::Initialize(&argc, argv, printf_usage);
+  if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return -1;
+  ::benchmark::RunSpecifiedBenchmarks();
+  ::benchmark::Shutdown();
+  // Release a possibly cached ANN object, so that it cannot be alive longer than the handle to a
+  // shared library it depends on (dynamic benchmark executable).
+  current_algo.reset();
+  return 0;
 }
+
 };  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/benchmark_util.hpp b/cpp/bench/ann/src/common/benchmark_util.hpp
deleted file mode 100644
index 7005883ffc..0000000000
--- a/cpp/bench/ann/src/common/benchmark_util.hpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "ann_types.hpp"
-#include <string>
-
-namespace raft::bench::ann {
-
-inline Metric parse_metric(const std::string& metric_str)
-{
-  if (metric_str == "inner_product") {
-    return raft::bench::ann::Metric::kInnerProduct;
-  } else if (metric_str == "euclidean") {
-    return raft::bench::ann::Metric::kEuclidean;
-  } else {
-    throw std::runtime_error("invalid metric: '" + metric_str + "'");
-  }
-}
-};  // namespace raft::bench::ann
\ No newline at end of file
diff --git a/cpp/bench/ann/src/common/conf.cpp b/cpp/bench/ann/src/common/conf.cpp
deleted file mode 100644
index d180f37973..0000000000
--- a/cpp/bench/ann/src/common/conf.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "conf.h"
-
-#include <iostream>
-#include <stdexcept>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#include "util.h"
-
-namespace raft::bench::ann {
-using std::runtime_error;
-using std::string;
-using std::unordered_set;
-using std::vector;
-
-Configuration::Configuration(std::istream& conf_stream)
-{
-  // to enable comments in json
-  auto conf = nlohmann::json::parse(conf_stream, nullptr, true, true);
-
-  parse_dataset_(conf.at("dataset"));
-  parse_index_(conf.at("index"), conf.at("search_basic_param"));
-}
-
-vector<Configuration::Index> Configuration::get_indices(const string& patterns) const
-{
-  vector<string> names;
-  for (const auto& index : indices_) {
-    names.push_back(index.name);
-  }
-
-  auto matched = match_(names, patterns);
-  if (matched.empty()) { throw runtime_error("no available index matches '" + patterns + "'"); }
-
-  vector<Index> res;
-  for (const auto& index : indices_) {
-    if (matched.find(index.name) != matched.end()) { res.push_back(index); }
-  }
-  return res;
-}
-
-void Configuration::parse_dataset_(const nlohmann::json& conf)
-{
-  dataset_conf_.name       = conf.at("name");
-  dataset_conf_.base_file  = conf.at("base_file");
-  dataset_conf_.query_file = conf.at("query_file");
-  dataset_conf_.distance   = conf.at("distance");
-
-  if (conf.contains("subset_first_row")) {
-    dataset_conf_.subset_first_row = conf.at("subset_first_row");
-  }
-  if (conf.contains("subset_size")) { dataset_conf_.subset_size = conf.at("subset_size"); }
-
-  if (conf.contains("dtype")) {
-    dataset_conf_.dtype = conf.at("dtype");
-  } else {
-    auto filename = dataset_conf_.base_file;
-    if (!filename.compare(filename.size() - 4, 4, "fbin")) {
-      dataset_conf_.dtype = "float";
-    } else if (!filename.compare(filename.size() - 5, 5, "u8bin")) {
-      dataset_conf_.dtype = "uint8";
-    } else if (!filename.compare(filename.size() - 5, 5, "i8bin")) {
-      dataset_conf_.dtype = "int8";
-    } else {
-      log_error("Could not determine data type of the dataset %s", filename.c_str());
-    }
-  }
-}
-
-void Configuration::parse_index_(const nlohmann::json& index_conf,
-                                 const nlohmann::json& search_basic_conf)
-{
-  const int batch_size = search_basic_conf.at("batch_size");
-  const int k          = search_basic_conf.at("k");
-  const int run_count  = search_basic_conf.at("run_count");
-
-  for (const auto& conf : index_conf) {
-    Index index;
-    index.name        = conf.at("name");
-    index.algo        = conf.at("algo");
-    index.build_param = conf.at("build_param");
-    index.file        = conf.at("file");
-    index.batch_size  = batch_size;
-    index.k           = k;
-    index.run_count   = run_count;
-
-    if (conf.contains("multigpu")) {
-      for (auto it : conf.at("multigpu")) {
-        index.dev_list.push_back(it);
-      }
-      if (index.dev_list.empty()) { throw std::runtime_error("dev_list shouln't be empty!"); }
-      index.dev_list.shrink_to_fit();
-      index.build_param["multigpu"] = conf["multigpu"];
-    }
-
-    if (conf.contains("refine_ratio")) {
-      float refine_ratio = conf.at("refine_ratio");
-      if (refine_ratio <= 1.0f) {
-        throw runtime_error("'" + index.name + "': refine_ratio should > 1.0");
-      }
-      index.refine_ratio = refine_ratio;
-    }
-
-    for (const auto& param : conf.at("search_params")) {
-      index.search_params.push_back(param);
-    }
-    index.search_result_file = conf.at("search_result_file");
-
-    indices_.push_back(index);
-  }
-}
-
-unordered_set<string> Configuration::match_(const vector<string>& candidates,
-                                            const string& patterns) const
-{
-  unordered_set<string> matched;
-  for (const auto& pat : split(patterns, ',')) {
-    if (pat.empty()) { continue; }
-
-    if (pat.back() == '*') {
-      auto len = pat.size() - 1;
-      for (const auto& item : candidates) {
-        if (item.compare(0, len, pat, 0, len) == 0) { matched.insert(item); }
-      }
-    } else {
-      for (const auto& item : candidates) {
-        if (item == pat) { matched.insert(item); }
-      }
-    }
-  }
-
-  return matched;
-}
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/conf.h b/cpp/bench/ann/src/common/conf.h
deleted file mode 100644
index 845defe94a..0000000000
--- a/cpp/bench/ann/src/common/conf.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-#include <iostream>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
-namespace raft::bench::ann {
-
-class Configuration {
- public:
-  struct Index {
-    std::string name;
-    std::string algo;
-    nlohmann::json build_param;
-    std::string file;
-    std::vector<int> dev_list;
-
-    int batch_size;
-    int k;
-    int run_count;
-    std::vector<nlohmann::json> search_params;
-    std::string search_result_file;
-    float refine_ratio{0.0f};
-  };
-
-  struct DatasetConf {
-    std::string name;
-    std::string base_file;
-    // use only a subset of base_file,
-    // the range of rows is [subset_first_row, subset_first_row + subset_size)
-    // however, subset_size = 0 means using all rows after subset_first_row
-    // that is, the subset is [subset_first_row, #rows in base_file)
-    size_t subset_first_row{0};
-    size_t subset_size{0};
-    std::string query_file;
-    std::string distance;
-
-    // data type of input dataset, possible values ["float", "int8", "uint8"]
-    std::string dtype;
-  };
-
-  Configuration(std::istream& conf_stream);
-
-  DatasetConf get_dataset_conf() const { return dataset_conf_; }
-  std::vector<Index> get_indices(const std::string& patterns) const;
-
- private:
-  void parse_dataset_(const nlohmann::json& conf);
-  void parse_index_(const nlohmann::json& index_conf, const nlohmann::json& search_basic_conf);
-  std::unordered_set<std::string> match_(const std::vector<std::string>& candidates,
-                                         const std::string& patterns) const;
-
-  DatasetConf dataset_conf_;
-  std::vector<Index> indices_;
-};
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp
new file mode 100644
index 0000000000..405b00a74e
--- /dev/null
+++ b/cpp/bench/ann/src/common/conf.hpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "util.hpp"
+
+#include <iostream>
+#include <optional>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#define JSON_DIAGNOSTICS 1
+#include <nlohmann/json.hpp>
+
+namespace raft::bench::ann {
+
+class Configuration {
+ public:
+  struct Index {
+    std::string name;
+    std::string algo;
+    nlohmann::json build_param;
+    std::string file;
+    std::vector<int> dev_list;
+
+    int batch_size;
+    int k;
+    std::vector<nlohmann::json> search_params;
+  };
+
+  struct DatasetConf {
+    std::string name;
+    std::string base_file;
+    // use only a subset of base_file,
+    // the range of rows is [subset_first_row, subset_first_row + subset_size)
+    // however, subset_size = 0 means using all rows after subset_first_row
+    // that is, the subset is [subset_first_row, #rows in base_file)
+    size_t subset_first_row{0};
+    size_t subset_size{0};
+    std::string query_file;
+    std::string distance;
+    std::optional<std::string> groundtruth_neighbors_file{std::nullopt};
+
+    // data type of input dataset, possible values ["float", "int8", "uint8"]
+    std::string dtype;
+  };
+
+  explicit inline Configuration(std::istream& conf_stream)
+  {
+    // to enable comments in json
+    auto conf = nlohmann::json::parse(conf_stream, nullptr, true, true);
+
+    parse_dataset_(conf.at("dataset"));
+    parse_index_(conf.at("index"), conf.at("search_basic_param"));
+  }
+
+  [[nodiscard]] inline auto get_dataset_conf() const -> DatasetConf { return dataset_conf_; }
+  [[nodiscard]] inline auto get_indices() const -> std::vector<Index> { return indices_; };
+
+ private:
+  inline void parse_dataset_(const nlohmann::json& conf)
+  {
+    dataset_conf_.name       = conf.at("name");
+    dataset_conf_.base_file  = conf.at("base_file");
+    dataset_conf_.query_file = conf.at("query_file");
+    dataset_conf_.distance   = conf.at("distance");
+
+    if (conf.contains("groundtruth_neighbors_file")) {
+      dataset_conf_.groundtruth_neighbors_file = conf.at("groundtruth_neighbors_file");
+    }
+    if (conf.contains("subset_first_row")) {
+      dataset_conf_.subset_first_row = conf.at("subset_first_row");
+    }
+    if (conf.contains("subset_size")) { dataset_conf_.subset_size = conf.at("subset_size"); }
+
+    if (conf.contains("dtype")) {
+      dataset_conf_.dtype = conf.at("dtype");
+    } else {
+      auto filename = dataset_conf_.base_file;
+      if (!filename.compare(filename.size() - 4, 4, "fbin")) {
+        dataset_conf_.dtype = "float";
+      } else if (!filename.compare(filename.size() - 5, 5, "u8bin")) {
+        dataset_conf_.dtype = "uint8";
+      } else if (!filename.compare(filename.size() - 5, 5, "i8bin")) {
+        dataset_conf_.dtype = "int8";
+      } else {
+        log_error("Could not determine data type of the dataset %s", filename.c_str());
+      }
+    }
+  }
+  inline void parse_index_(const nlohmann::json& index_conf,
+                           const nlohmann::json& search_basic_conf)
+  {
+    const int batch_size = search_basic_conf.at("batch_size");
+    const int k          = search_basic_conf.at("k");
+
+    for (const auto& conf : index_conf) {
+      Index index;
+      index.name        = conf.at("name");
+      index.algo        = conf.at("algo");
+      index.build_param = conf.at("build_param");
+      index.file        = conf.at("file");
+      index.batch_size  = batch_size;
+      index.k           = k;
+
+      if (conf.contains("multigpu")) {
+        for (auto it : conf.at("multigpu")) {
+          index.dev_list.push_back(it);
+        }
+        if (index.dev_list.empty()) { throw std::runtime_error("dev_list shouln't be empty!"); }
+        index.dev_list.shrink_to_fit();
+        index.build_param["multigpu"] = conf["multigpu"];
+      }
+
+      for (auto param : conf.at("search_params")) {
+        /*  ### Special parameters for backward compatibility ###
+
+          - Local values of `k` and `n_queries` take priority.
+          - The legacy "batch_size" renamed to `n_queries`.
+          - Basic search params are used otherwise.
+        */
+        if (!param.contains("k")) { param["k"] = k; }
+        if (!param.contains("n_queries")) {
+          if (param.contains("batch_size")) {
+            param["n_queries"] = param["batch_size"];
+            param.erase("batch_size");
+          } else {
+            param["n_queries"] = batch_size;
+          }
+        }
+        index.search_params.push_back(param);
+      }
+
+      indices_.push_back(index);
+    }
+  }
+
+  DatasetConf dataset_conf_;
+  std::vector<Index> indices_;
+};
+
+}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/cuda_stub.hpp b/cpp/bench/ann/src/common/cuda_stub.hpp
new file mode 100644
index 0000000000..879a99697f
--- /dev/null
+++ b/cpp/bench/ann/src/common/cuda_stub.hpp
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cuda_runtime_api.h>
+#include <dlfcn.h>
+
+namespace raft::bench::ann {
+
+struct cuda_lib_handle {
+  void* handle{nullptr};
+  explicit cuda_lib_handle()
+  {
+    handle = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE);
+  }
+  ~cuda_lib_handle() noexcept
+  {
+    if (handle != nullptr) { dlclose(handle); }
+  }
+
+  [[nodiscard]] inline auto found() const -> bool { return handle != nullptr; }
+};
+
+static inline cuda_lib_handle cudart{};
+
+namespace stub {
+
+[[gnu::weak, gnu::noinline]] cudaError_t cudaMemcpy(void* dst,
+                                                    const void* src,
+                                                    size_t count,
+                                                    enum cudaMemcpyKind kind)
+{
+  return cudaSuccess;
+}
+
+[[gnu::weak, gnu::noinline]] cudaError_t cudaMalloc(void** ptr, size_t size)
+{
+  *ptr = nullptr;
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaMemset(void* devPtr, int value, size_t count)
+{
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaFree(void* devPtr) { return cudaSuccess; }
+[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamCreate(cudaStream_t* pStream)
+{
+  *pStream = 0;
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamCreateWithFlags(cudaStream_t* pStream,
+                                                                   unsigned int flags)
+{
+  *pStream = 0;
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamDestroy(cudaStream_t pStream)
+{
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamSynchronize(cudaStream_t pStream)
+{
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaEventCreate(cudaEvent_t* event)
+{
+  *event = 0;
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream)
+{
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaEventSynchronize(cudaEvent_t event)
+{
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaEventElapsedTime(float* ms,
+                                                              cudaEvent_t start,
+                                                              cudaEvent_t end)
+{
+  *ms = 0;
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaEventDestroy(cudaEvent_t event) { return cudaSuccess; }
+[[gnu::weak, gnu::noinline]] cudaError_t cudaGetDevice(int* device)
+{
+  *device = 0;
+  return cudaSuccess;
+};
+[[gnu::weak, gnu::noinline]] cudaError_t cudaDriverGetVersion(int* driver)
+{
+  *driver = 0;
+  return cudaSuccess;
+};
+[[gnu::weak, gnu::noinline]] cudaError_t cudaRuntimeGetVersion(int* runtime)
+{
+  *runtime = 0;
+  return cudaSuccess;
+};
+[[gnu::weak, gnu::noinline]] cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp* prop,
+                                                                 int device)
+{
+  *prop = cudaDeviceProp{};
+  return cudaSuccess;
+}
+
+}  // namespace stub
+
+#define RAFT_DECLARE_CUDART(fun)                                                        \
+  static inline decltype(&stub::fun) fun =                                              \
+    cudart.found() ? reinterpret_cast<decltype(&stub::fun)>(dlsym(cudart.handle, #fun)) \
+                   : &stub::fun
+
+RAFT_DECLARE_CUDART(cudaMemcpy);
+RAFT_DECLARE_CUDART(cudaMalloc);
+RAFT_DECLARE_CUDART(cudaMemset);
+RAFT_DECLARE_CUDART(cudaFree);
+RAFT_DECLARE_CUDART(cudaStreamCreate);
+RAFT_DECLARE_CUDART(cudaStreamCreateWithFlags);
+RAFT_DECLARE_CUDART(cudaStreamDestroy);
+RAFT_DECLARE_CUDART(cudaStreamSynchronize);
+RAFT_DECLARE_CUDART(cudaEventCreate);
+RAFT_DECLARE_CUDART(cudaEventRecord);
+RAFT_DECLARE_CUDART(cudaEventSynchronize);
+RAFT_DECLARE_CUDART(cudaEventElapsedTime);
+RAFT_DECLARE_CUDART(cudaEventDestroy);
+RAFT_DECLARE_CUDART(cudaGetDevice);
+RAFT_DECLARE_CUDART(cudaDriverGetVersion);
+RAFT_DECLARE_CUDART(cudaRuntimeGetVersion);
+RAFT_DECLARE_CUDART(cudaGetDeviceProperties);
+
+#undef RAFT_DECLARE_CUDART
+
+};  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/dataset.h b/cpp/bench/ann/src/common/dataset.hpp
similarity index 85%
rename from cpp/bench/ann/src/common/dataset.h
rename to cpp/bench/ann/src/common/dataset.hpp
index ae05cd02a1..7fa82a632f 100644
--- a/cpp/bench/ann/src/common/dataset.h
+++ b/cpp/bench/ann/src/common/dataset.hpp
@@ -15,11 +15,10 @@
  */
 #pragma once
 
-#include <cstdint>
+#include "util.hpp"
 
 #ifndef CPU_ONLY
 #include <cuda_fp16.h>
-#include <raft/util/cudart_utils.hpp>
 #else
 typedef uint16_t half;
 #endif
@@ -29,7 +28,9 @@ typedef uint16_t half;
 #include <sys/stat.h>
 
 #include <cassert>
+#include <cstdint>
 #include <cstdio>
+#include <optional>
 #include <stdexcept>
 #include <string>
 #include <type_traits>
@@ -54,7 +55,8 @@ class BinFile {
           uint32_t subset_size      = 0);
   ~BinFile()
   {
-    if (fp_) { fclose(fp_); }
+    if (mapped_ptr_ != nullptr) { unmap(); }
+    if (fp_ != nullptr) { fclose(fp_); }
   }
   BinFile(const BinFile&)            = delete;
   BinFile& operator=(const BinFile&) = delete;
@@ -101,6 +103,7 @@ class BinFile {
     int fid     = fileno(fp_);
     mapped_ptr_ = mmap(nullptr, file_size_, PROT_READ, MAP_PRIVATE, fid, 0);
     if (mapped_ptr_ == MAP_FAILED) {
+      mapped_ptr_ = nullptr;
       throw std::runtime_error("mmap error: Value of errno " + std::to_string(errno) + ", " +
                                std::string(strerror(errno)));
     }
@@ -124,11 +127,11 @@ class BinFile {
   uint32_t subset_first_row_;
   uint32_t subset_size_;
 
-  mutable FILE* fp_;
+  mutable FILE* fp_{nullptr};
   mutable uint32_t nrows_;
   mutable uint32_t ndims_;
   mutable size_t file_size_;
-  mutable void* mapped_ptr_;
+  mutable void* mapped_ptr_{nullptr};
 };
 
 template <typename T>
@@ -254,6 +257,7 @@ class Dataset {
   std::string name() const { return name_; }
   std::string distance() const { return distance_; }
   virtual int dim() const               = 0;
+  virtual uint32_t max_k() const        = 0;
   virtual size_t base_set_size() const  = 0;
   virtual size_t query_set_size() const = 0;
 
@@ -271,12 +275,37 @@ class Dataset {
     return query_set_;
   }
 
+  const int32_t* gt_set() const
+  {
+    if (!gt_set_) { load_gt_set_(); }
+    return gt_set_;
+  }
+
   const T* base_set_on_gpu() const;
   const T* query_set_on_gpu() const;
   const T* mapped_base_set() const;
 
+  auto query_set(MemoryType memory_type) const -> const T*
+  {
+    switch (memory_type) {
+      case MemoryType::Device: return query_set_on_gpu();
+      default: return query_set();
+    }
+  }
+
+  auto base_set(MemoryType memory_type) const -> const T*
+  {
+    switch (memory_type) {
+      case MemoryType::Device: return base_set_on_gpu();
+      case MemoryType::Host: return base_set();
+      case MemoryType::HostMmap: return mapped_base_set();
+      default: return nullptr;
+    }
+  }
+
  protected:
   virtual void load_base_set_() const  = 0;
+  virtual void load_gt_set_() const    = 0;
   virtual void load_query_set_() const = 0;
   virtual void map_base_set_() const   = 0;
 
@@ -288,6 +317,7 @@ class Dataset {
   mutable T* d_base_set_      = nullptr;
   mutable T* d_query_set_     = nullptr;
   mutable T* mapped_base_set_ = nullptr;
+  mutable int32_t* gt_set_    = nullptr;
 };
 
 template <typename T>
@@ -295,6 +325,7 @@ Dataset<T>::~Dataset()
 {
   delete[] base_set_;
   delete[] query_set_;
+  delete[] gt_set_;
 #ifndef CPU_ONLY
   if (d_base_set_) { cudaFree(d_base_set_); }
   if (d_query_set_) { cudaFree(d_query_set_); }
@@ -307,9 +338,8 @@ const T* Dataset<T>::base_set_on_gpu() const
 #ifndef CPU_ONLY
   if (!d_base_set_) {
     base_set();
-    RAFT_CUDA_TRY(cudaMalloc((void**)&d_base_set_, base_set_size() * dim() * sizeof(T)));
-    RAFT_CUDA_TRY(cudaMemcpy(
-      d_base_set_, base_set_, base_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice));
+    cudaMalloc((void**)&d_base_set_, base_set_size() * dim() * sizeof(T));
+    cudaMemcpy(d_base_set_, base_set_, base_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice);
   }
 #endif
   return d_base_set_;
@@ -321,9 +351,9 @@ const T* Dataset<T>::query_set_on_gpu() const
 #ifndef CPU_ONLY
   if (!d_query_set_) {
     query_set();
-    RAFT_CUDA_TRY(cudaMalloc((void**)&d_query_set_, query_set_size() * dim() * sizeof(T)));
-    RAFT_CUDA_TRY(cudaMemcpy(
-      d_query_set_, query_set_, query_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice));
+    cudaMalloc((void**)&d_query_set_, query_set_size() * dim() * sizeof(T));
+    cudaMemcpy(
+      d_query_set_, query_set_, query_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice);
   }
 #endif
   return d_query_set_;
@@ -344,27 +374,28 @@ class BinDataset : public Dataset<T> {
              size_t subset_first_row,
              size_t subset_size,
              const std::string& query_file,
-             const std::string& distance);
-  ~BinDataset()
-  {
-    if (this->mapped_base_set_) { base_file_.unmap(); }
-  }
+             const std::string& distance,
+             const std::optional<std::string>& groundtruth_neighbors_file);
 
   int dim() const override;
+  uint32_t max_k() const override;
   size_t base_set_size() const override;
   size_t query_set_size() const override;
 
  private:
   void load_base_set_() const override;
   void load_query_set_() const override;
+  void load_gt_set_() const override;
   void map_base_set_() const override;
 
   mutable int dim_               = 0;
+  mutable uint32_t max_k_        = 0;
   mutable size_t base_set_size_  = 0;
   mutable size_t query_set_size_ = 0;
 
   BinFile<T> base_file_;
   BinFile<T> query_file_;
+  std::optional<BinFile<std::int32_t>> gt_file_{std::nullopt};
 };
 
 template <typename T>
@@ -373,11 +404,15 @@ BinDataset<T>::BinDataset(const std::string& name,
                           size_t subset_first_row,
                           size_t subset_size,
                           const std::string& query_file,
-                          const std::string& distance)
+                          const std::string& distance,
+                          const std::optional<std::string>& groundtruth_neighbors_file)
   : Dataset<T>(name, distance),
     base_file_(base_file, "r", subset_first_row, subset_size),
     query_file_(query_file, "r")
 {
+  if (groundtruth_neighbors_file.has_value()) {
+    gt_file_.emplace(groundtruth_neighbors_file.value(), "r");
+  }
 }
 
 template <typename T>
@@ -389,6 +424,13 @@ int BinDataset<T>::dim() const
   return dim_;
 }
 
+template <typename T>
+uint32_t BinDataset<T>::max_k() const
+{
+  if (!this->gt_set_) { load_gt_set_(); }
+  return max_k_;
+}
+
 template <typename T>
 size_t BinDataset<T>::query_set_size() const
 {
@@ -437,6 +479,19 @@ void BinDataset<T>::load_query_set_() const
   query_file_.read(this->query_set_);
 }
 
+template <typename T>
+void BinDataset<T>::load_gt_set_() const
+{
+  if (gt_file_.has_value()) {
+    size_t queries;
+    int k;
+    gt_file_->get_shape(&queries, &k);
+    this->gt_set_ = new std::int32_t[queries * k];
+    gt_file_->read(this->gt_set_);
+    max_k_ = k;
+  }
+}
+
 template <typename T>
 void BinDataset<T>::map_base_set_() const
 {
diff --git a/cpp/bench/ann/src/common/util.cpp b/cpp/bench/ann/src/common/util.cpp
deleted file mode 100644
index 17636f76d7..0000000000
--- a/cpp/bench/ann/src/common/util.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "util.h"
-
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include <cstring>
-#include <sstream>
-
-namespace raft::bench::ann {
-
-std::vector<std::string> split(const std::string& s, char delimiter)
-{
-  std::vector<std::string> tokens;
-  std::string token;
-  std::istringstream iss(s);
-  while (getline(iss, token, delimiter)) {
-    if (!token.empty()) { tokens.push_back(token); }
-  }
-  return tokens;
-}
-
-bool file_exists(const std::string& filename)
-{
-  struct stat statbuf;
-  if (stat(filename.c_str(), &statbuf) != 0) { return false; }
-  return S_ISREG(statbuf.st_mode);
-}
-
-bool dir_exists(const std::string& dir)
-{
-  struct stat statbuf;
-  if (stat(dir.c_str(), &statbuf) != 0) { return false; }
-  return S_ISDIR(statbuf.st_mode);
-}
-
-bool create_dir(const std::string& dir)
-{
-  const auto path = split(dir, '/');
-
-  std::string cwd;
-  if (!dir.empty() && dir[0] == '/') { cwd += '/'; }
-
-  for (const auto& p : path) {
-    cwd += p + "/";
-    if (!dir_exists(cwd)) {
-      int ret = mkdir(cwd.c_str(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
-      if (ret != 0) { return false; }
-    }
-  }
-  return true;
-}
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/util.h b/cpp/bench/ann/src/common/util.h
deleted file mode 100644
index 290bf4cea9..0000000000
--- a/cpp/bench/ann/src/common/util.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <chrono>
-#include <cstdio>
-#include <ctime>
-#include <iostream>
-#include <string>
-#include <vector>
-
-namespace raft::bench::ann {
-
-class Timer {
- public:
-  Timer() { reset(); }
-  void reset() { start_time_ = std::chrono::steady_clock::now(); }
-  float elapsed_ms()
-  {
-    auto end_time = std::chrono::steady_clock::now();
-    auto dur =
-      std::chrono::duration_cast<std::chrono::duration<float, std::milli>>(end_time - start_time_);
-    return dur.count();
-  }
-
- private:
-  std::chrono::steady_clock::time_point start_time_;
-};
-
-std::vector<std::string> split(const std::string& s, char delimiter);
-
-bool file_exists(const std::string& filename);
-bool dir_exists(const std::string& dir);
-bool create_dir(const std::string& dir);
-
-template <typename... Ts>
-void log_(const char* level, Ts... vs)
-{
-  char buf[20];
-  std::time_t now = std::time(nullptr);
-  std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now));
-  printf("%s [%s] ", buf, level);
-  printf(vs...);
-  printf("\n");
-  fflush(stdout);
-}
-
-template <typename... Ts>
-void log_info(Ts... vs)
-{
-  log_("info", vs...);
-}
-
-template <typename... Ts>
-void log_warn(Ts... vs)
-{
-  log_("warn", vs...);
-}
-
-template <typename... Ts>
-void log_error(Ts... vs)
-{
-  log_("error", vs...);
-}
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp
new file mode 100644
index 0000000000..88a9b4bb7a
--- /dev/null
+++ b/cpp/bench/ann/src/common/util.hpp
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "ann_types.hpp"
+
+#include "cuda_stub.hpp"
+#include <nvtx3/nvToolsExt.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <chrono>
+#include <cstdio>
+#include <cstring>
+#include <ctime>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <filesystem>
+#include <functional>
+
+namespace raft::bench::ann {
+
+template <typename T>
+struct buf {
+  MemoryType memory_type;
+  std::size_t size;
+  T* data;
+  buf(MemoryType memory_type, std::size_t size)
+    : memory_type(memory_type), size(size), data(nullptr)
+  {
+    switch (memory_type) {
+      case MemoryType::Device: {
+        cudaMalloc(reinterpret_cast<void**>(&data), size * sizeof(T));
+        cudaMemset(data, 0, size * sizeof(T));
+      } break;
+      default: {
+        data = reinterpret_cast<T*>(malloc(size * sizeof(T)));
+        std::memset(data, 0, size * sizeof(T));
+      }
+    }
+  }
+  ~buf() noexcept
+  {
+    if (data == nullptr) { return; }
+    switch (memory_type) {
+      case MemoryType::Device: {
+        cudaFree(data);
+      } break;
+      default: {
+        free(data);
+      }
+    }
+  }
+
+  [[nodiscard]] auto move(MemoryType target_memory_type) -> buf<T>
+  {
+    buf<T> r{target_memory_type, size};
+    if ((memory_type == MemoryType::Device && target_memory_type != MemoryType::Device) ||
+        (memory_type != MemoryType::Device && target_memory_type == MemoryType::Device)) {
+      cudaMemcpy(r.data, data, size * sizeof(T), cudaMemcpyDefault);
+    } else {
+      std::swap(data, r.data);
+    }
+    return r;
+  }
+};
+
+struct cuda_timer {
+ private:
+  cudaStream_t stream_;
+  cudaEvent_t start_;
+  cudaEvent_t stop_;
+  double total_time_{0};
+
+ public:
+  struct cuda_lap {
+   private:
+    cudaStream_t stream_;
+    cudaEvent_t start_;
+    cudaEvent_t stop_;
+    double& total_time_;
+
+   public:
+    cuda_lap(cudaStream_t stream, cudaEvent_t start, cudaEvent_t stop, double& total_time)
+      : start_(start), stop_(stop), stream_(stream), total_time_(total_time)
+    {
+      cudaStreamSynchronize(stream_);
+      cudaEventRecord(start_, stream_);
+    }
+    cuda_lap() = delete;
+
+    ~cuda_lap() noexcept
+    {
+      cudaEventRecord(stop_, stream_);
+      cudaEventSynchronize(stop_);
+      float milliseconds = 0.0f;
+      cudaEventElapsedTime(&milliseconds, start_, stop_);
+      total_time_ += milliseconds / 1000.0;
+    }
+  };
+
+  cuda_timer()
+  {
+    cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking);
+    cudaEventCreate(&stop_);
+    cudaEventCreate(&start_);
+  }
+
+  ~cuda_timer() noexcept
+  {
+    cudaEventDestroy(start_);
+    cudaEventDestroy(stop_);
+    cudaStreamDestroy(stream_);
+  }
+
+  [[nodiscard]] auto stream() const -> cudaStream_t { return stream_; }
+
+  [[nodiscard]] auto total_time() const -> double { return total_time_; }
+
+  [[nodiscard]] auto lap() -> cuda_timer::cuda_lap
+  {
+    return cuda_lap{stream_, start_, stop_, total_time_};
+  }
+};
+
+inline auto cuda_info()
+{
+  int dev, driver = 0, runtime = 0;
+  cudaDriverGetVersion(&driver);
+  cudaRuntimeGetVersion(&runtime);
+
+  cudaDeviceProp device_prop;
+  cudaGetDevice(&dev);
+  cudaGetDeviceProperties(&device_prop, dev);
+  std::vector<std::tuple<std::string, std::string>> props;
+  props.emplace_back("gpu_name", std::string(device_prop.name));
+  props.emplace_back("gpu_sm_count", std::to_string(device_prop.multiProcessorCount));
+  props.emplace_back("gpu_sm_freq", std::to_string(device_prop.clockRate * 1e3));
+  props.emplace_back("gpu_mem_freq", std::to_string(device_prop.memoryClockRate * 1e3));
+  props.emplace_back("gpu_mem_bus_width", std::to_string(device_prop.memoryBusWidth));
+  props.emplace_back("gpu_mem_global_size", std::to_string(device_prop.totalGlobalMem));
+  props.emplace_back("gpu_mem_shared_size", std::to_string(device_prop.sharedMemPerMultiprocessor));
+  props.emplace_back("gpu_driver_version",
+                     std::to_string(driver / 1000) + "." + std::to_string((driver % 100) / 10));
+  props.emplace_back("gpu_runtime_version",
+                     std::to_string(runtime / 1000) + "." + std::to_string((runtime % 100) / 10));
+  return props;
+}
+
+struct nvtx_case {
+ private:
+  std::string case_name_;
+  std::array<char, 32> iter_name_{0};
+  nvtxDomainHandle_t domain_;
+  int64_t iteration_ = 0;
+  nvtxEventAttributes_t case_attrib_{0};
+  nvtxEventAttributes_t iter_attrib_{0};
+
+ public:
+  struct nvtx_lap {
+   private:
+    nvtxDomainHandle_t domain_;
+
+   public:
+    nvtx_lap(nvtxDomainHandle_t domain, nvtxEventAttributes_t* attr) : domain_(domain)
+    {
+      nvtxDomainRangePushEx(domain_, attr);
+    }
+    nvtx_lap() = delete;
+    ~nvtx_lap() noexcept { nvtxDomainRangePop(domain_); }
+  };
+
+  explicit nvtx_case(std::string case_name)
+    : case_name_(std::move(case_name)), domain_(nvtxDomainCreateA("ANN benchmark"))
+  {
+    case_attrib_.version       = NVTX_VERSION;
+    iter_attrib_.version       = NVTX_VERSION;
+    case_attrib_.size          = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+    iter_attrib_.size          = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+    case_attrib_.colorType     = NVTX_COLOR_ARGB;
+    iter_attrib_.colorType     = NVTX_COLOR_ARGB;
+    case_attrib_.messageType   = NVTX_MESSAGE_TYPE_ASCII;
+    iter_attrib_.messageType   = NVTX_MESSAGE_TYPE_ASCII;
+    case_attrib_.message.ascii = case_name_.c_str();
+    auto c                     = std::hash<std::string>{}(case_name_);
+    case_attrib_.color         = c | 0xA0A0A0;
+    nvtxDomainRangePushEx(domain_, &case_attrib_);
+  }
+
+  ~nvtx_case()
+  {
+    nvtxDomainRangePop(domain_);
+    nvtxDomainDestroy(domain_);
+  }
+
+  [[nodiscard]] auto lap() -> nvtx_case::nvtx_lap
+  {
+    auto i     = iteration_++;
+    uint32_t c = (i % 5);
+    uint32_t r = 150 + c * 20;
+    uint32_t g = 200 + c * 10;
+    uint32_t b = 220 + c * 5;
+    std::snprintf(iter_name_.data(), iter_name_.size(), "Lap %zd", i);
+    iter_attrib_.message.ascii = iter_name_.data();
+    iter_attrib_.color         = (r << 16) + (g << 8) + b;
+    return nvtx_lap{domain_, &iter_attrib_};
+  }
+};
+
+inline std::vector<std::string> split(const std::string& s, char delimiter)
+{
+  std::vector<std::string> tokens;
+  std::string token;
+  std::istringstream iss(s);
+  while (getline(iss, token, delimiter)) {
+    if (!token.empty()) { tokens.push_back(token); }
+  }
+  return tokens;
+}
+
+inline bool file_exists(const std::string& filename)
+{
+  struct stat statbuf;
+  if (stat(filename.c_str(), &statbuf) != 0) { return false; }
+  return S_ISREG(statbuf.st_mode);
+}
+
+inline bool dir_exists(const std::string& dir)
+{
+  struct stat statbuf;
+  if (stat(dir.c_str(), &statbuf) != 0) { return false; }
+  return S_ISDIR(statbuf.st_mode);
+}
+
+inline bool create_dir(const std::string& dir)
+{
+  const auto path = split(dir, '/');
+
+  std::string cwd;
+  if (!dir.empty() && dir[0] == '/') { cwd += '/'; }
+
+  for (const auto& p : path) {
+    cwd += p + "/";
+    if (!dir_exists(cwd)) {
+      int ret = mkdir(cwd.c_str(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
+      if (ret != 0) { return false; }
+    }
+  }
+  return true;
+}
+
+inline void make_sure_parent_dir_exists(const std::string& file_path)
+{
+  const auto pos = file_path.rfind('/');
+  if (pos != std::string::npos) {
+    auto dir = file_path.substr(0, pos);
+    if (!dir_exists(dir)) { create_dir(dir); }
+  }
+}
+
+inline auto combine_path(const std::string& dir, const std::string& path)
+{
+  std::filesystem::path p_dir(dir);
+  std::filesystem::path p_suf(path);
+  return (p_dir / p_suf).string();
+}
+
+template <typename... Ts>
+void log_(const char* level, const Ts&... vs)
+{
+  char buf[20];
+  std::time_t now = std::time(nullptr);
+  std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now));
+  printf("%s [%s] ", buf, level);
+  if constexpr (sizeof...(Ts) == 1) {
+    printf("%s", vs...);
+  } else {
+    printf(vs...);
+  }
+  printf("\n");
+  fflush(stdout);
+}
+
+template <typename... Ts>
+void log_info(Ts&&... vs)
+{
+  log_("info", std::forward<Ts>(vs)...);
+}
+
+template <typename... Ts>
+void log_warn(Ts&&... vs)
+{
+  log_("warn", std::forward<Ts>(vs)...);
+}
+
+template <typename... Ts>
+void log_error(Ts&&... vs)
+{
+  log_("error", std::forward<Ts>(vs)...);
+}
+
+}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/faiss/faiss_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
index 0bad86905b..2733a9419c 100644
--- a/cpp/bench/ann/src/faiss/faiss_benchmark.cu
+++ b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
@@ -97,7 +97,6 @@ template <typename T>
 std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
                                                       const std::string& distance,
                                                       int dim,
-                                                      float refine_ratio,
                                                       const nlohmann::json& conf,
                                                       const std::vector<int>& dev_list)
 {
@@ -123,7 +122,6 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
 
   if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
 
-  if (refine_ratio > 1.0) {}
   return ann;
 }
 
@@ -145,6 +143,6 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 
 }  // namespace raft::bench::ann
 
-#include "../common/benchmark.hpp"
-
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
+REGISTER_ALGO_INSTANCE(float);
+REGISTER_ALGO_INSTANCE(std::int8_t);
+REGISTER_ALGO_INSTANCE(std::uint8_t);
diff --git a/cpp/bench/ann/src/faiss/faiss_wrapper.h b/cpp/bench/ann/src/faiss/faiss_wrapper.h
index 8cfc26ea5b..6c367ba522 100644
--- a/cpp/bench/ann/src/faiss/faiss_wrapper.h
+++ b/cpp/bench/ann/src/faiss/faiss_wrapper.h
@@ -16,6 +16,10 @@
 #ifndef FAISS_WRAPPER_H_
 #define FAISS_WRAPPER_H_
 
+#include "../common/ann_types.hpp"
+
+#include <raft/util/cudart_utils.hpp>
+
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexIVFFlat.h>
 #include <faiss/IndexIVFPQ.h>
@@ -35,10 +39,6 @@
 #include <string>
 #include <type_traits>
 
-#include "../common/ann_types.hpp"
-#include "../common/benchmark_util.hpp"
-#include <raft/util/cudart_utils.hpp>
-
 namespace {
 
 faiss::MetricType parse_metric_type(raft::bench::ann::Metric metric)
@@ -102,9 +102,8 @@ class FaissGpu : public ANN<T> {
   {
     AlgoProperty property;
     // to enable building big dataset which is larger than GPU memory
-    property.dataset_memory_type      = MemoryType::Host;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = false;
+    property.dataset_memory_type = MemoryType::Host;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
 
diff --git a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
index 8072cd857c..636bf753b1 100644
--- a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
+++ b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
@@ -84,7 +84,6 @@ template <typename T>
 std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
                                                       const std::string& distance,
                                                       int dim,
-                                                      float refine_ratio,
                                                       const nlohmann::json& conf,
                                                       const std::vector<int>& dev_list)
 {
@@ -101,7 +100,6 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
   if (algo == "ggnn") { ann = make_algo<T, raft::bench::ann::Ggnn>(metric, dim, conf); }
   if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
 
-  if (refine_ratio > 1.0) {}
   return ann;
 }
 
@@ -120,6 +118,6 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 
 }  // namespace raft::bench::ann
 
-#include "../common/benchmark.hpp"
-
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
\ No newline at end of file
+REGISTER_ALGO_INSTANCE(float);
+REGISTER_ALGO_INSTANCE(std::int8_t);
+REGISTER_ALGO_INSTANCE(std::uint8_t);
diff --git a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
index fd8fe0f2ec..44986980fe 100644
--- a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
+++ b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
@@ -16,14 +16,14 @@
 
 #pragma once
 
-#include <memory>
-#include <stdexcept>
-
 #include "../common/ann_types.hpp"
-#include "../common/benchmark_util.hpp"
+
 #include <ggnn/cuda_knn_ggnn_gpu_instance.cuh>
 #include <raft/util/cudart_utils.hpp>
 
+#include <memory>
+#include <stdexcept>
+
 namespace raft::bench::ann {
 
 template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
@@ -50,6 +50,7 @@ class Ggnn : public ANN<T> {
     int max_iterations{400};
     int cache_size{512};
     int sorted_size{256};
+    auto needs_dataset() const -> bool override { return true; }
   };
 
   Ggnn(Metric metric, int dim, const BuildParam& param);
@@ -138,9 +139,8 @@ class GgnnImpl : public ANN<T> {
   AlgoProperty get_property() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = MemoryType::Device;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = true;
+    property.dataset_memory_type = MemoryType::Device;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
 
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
index cd823e8a69..bb93a83117 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "../common/ann_types.hpp"
+
 #include <algorithm>
 #include <cmath>
 #include <memory>
@@ -22,9 +24,6 @@
 #include <type_traits>
 #include <utility>
 
-#include "../common/benchmark_util.hpp"
-
-#include "../common/ann_types.hpp"
 #undef WARP_SIZE
 #include "hnswlib_wrapper.h"
 #define JSON_DIAGNOSTICS 1
@@ -76,7 +75,6 @@ template <typename T>
 std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
                                                       const std::string& distance,
                                                       int dim,
-                                                      float refine_ratio,
                                                       const nlohmann::json& conf,
                                                       const std::vector<int>& dev_list)
 {
@@ -95,8 +93,6 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
   }
 
   if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
-
-  if (refine_ratio > 1.0) {}
   return ann;
 }
 
@@ -115,6 +111,6 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 
 };  // namespace raft::bench::ann
 
-#include "../common/benchmark.hpp"
-
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
\ No newline at end of file
+REGISTER_ALGO_INSTANCE(float);
+REGISTER_ALGO_INSTANCE(std::int8_t);
+REGISTER_ALGO_INSTANCE(std::uint8_t);
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
index c5c3a4a2a6..2a7a984a8c 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
@@ -191,9 +191,8 @@ class HnswLib : public ANN<T> {
   AlgoProperty get_property() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = MemoryType::Host;
-    property.query_memory_type        = MemoryType::Host;
-    property.need_dataset_when_search = false;
+    property.dataset_memory_type = MemoryType::Host;
+    property.query_memory_type   = MemoryType::Host;
     return property;
   }
 
diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu
index aa055db750..3245e0e064 100644
--- a/cpp/bench/ann/src/raft/raft_benchmark.cu
+++ b/cpp/bench/ann/src/raft/raft_benchmark.cu
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "../common/ann_types.hpp"
+
 #include <algorithm>
 #include <cmath>
 #include <memory>
@@ -22,8 +24,6 @@
 #include <type_traits>
 #include <utility>
 
-#include "../common/ann_types.hpp"
-#include "../common/benchmark_util.hpp"
 #undef WARP_SIZE
 #ifdef RAFT_ANN_BENCH_USE_RAFT_BFKNN
 #include "raft_wrapper.h"
@@ -120,6 +120,10 @@ void parse_search_param(const nlohmann::json& conf,
     // set half as default
     param.pq_param.lut_dtype = CUDA_R_16F;
   }
+  if (conf.contains("refine_ratio")) {
+    param.refine_ratio = conf.at("refine_ratio");
+    if (param.refine_ratio < 1.0f) { throw std::runtime_error("refine_ratio should be >= 1.0"); }
+  }
 }
 #endif
 
@@ -165,7 +169,6 @@ template <typename T>
 std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
                                                       const std::string& distance,
                                                       int dim,
-                                                      float refine_ratio,
                                                       const nlohmann::json& conf,
                                                       const std::vector<int>& dev_list)
 {
@@ -194,8 +197,7 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
   if (algo == "raft_ivf_pq") {
     typename raft::bench::ann::RaftIvfPQ<T, int64_t>::BuildParam param;
     parse_build_param<T, int64_t>(conf, param);
-    ann =
-      std::make_unique<raft::bench::ann::RaftIvfPQ<T, int64_t>>(metric, dim, param, refine_ratio);
+    ann = std::make_unique<raft::bench::ann::RaftIvfPQ<T, int64_t>>(metric, dim, param);
   }
 #endif
 #ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA
@@ -207,7 +209,6 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
 #endif
   if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
 
-  if (refine_ratio > 1.0) {}
   return ann;
 }
 
@@ -249,6 +250,6 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 
 };  // namespace raft::bench::ann
 
-#include "../common/benchmark.hpp"
-
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
+REGISTER_ALGO_INSTANCE(float);
+REGISTER_ALGO_INSTANCE(std::int8_t);
+REGISTER_ALGO_INSTANCE(std::uint8_t);
diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
index d47de1eeac..4e3d3a7a58 100644
--- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
@@ -48,11 +48,23 @@ class RaftCagra : public ANN<T> {
 
   struct SearchParam : public AnnSearchParam {
     raft::neighbors::experimental::cagra::search_params p;
+    auto needs_dataset() const -> bool override { return true; }
   };
 
   using BuildParam = raft::neighbors::cagra::index_params;
 
-  RaftCagra(Metric metric, int dim, const BuildParam& param);
+  RaftCagra(Metric metric, int dim, const BuildParam& param)
+    : ANN<T>(metric, dim),
+      index_params_(param),
+      dimension_(dim),
+      mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
+  {
+    rmm::mr::set_current_device_resource(&mr_);
+    index_params_.metric = parse_metric_type(metric);
+    RAFT_CUDA_TRY(cudaGetDevice(&device_));
+  }
+
+  ~RaftCagra() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
 
   void build(const T* dataset, size_t nrow, cudaStream_t stream) final;
 
@@ -71,38 +83,24 @@ class RaftCagra : public ANN<T> {
   AlgoProperty get_property() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = MemoryType::HostMmap;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = true;
+    property.dataset_memory_type = MemoryType::HostMmap;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
   void save(const std::string& file) const override;
   void load(const std::string&) override;
 
-  ~RaftCagra() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
-
  private:
+  // `mr_` must go first to make sure it dies last
+  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
   raft::device_resources handle_;
   BuildParam index_params_;
   raft::neighbors::cagra::search_params search_params_;
   std::optional<raft::neighbors::cagra::index<T, IdxT>> index_;
   int device_;
   int dimension_;
-  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
 };
 
-template <typename T, typename IdxT>
-RaftCagra<T, IdxT>::RaftCagra(Metric metric, int dim, const BuildParam& param)
-  : ANN<T>(metric, dim),
-    index_params_(param),
-    dimension_(dim),
-    mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
-{
-  rmm::mr::set_current_device_resource(&mr_);
-  index_params_.metric = parse_metric_type(metric);
-  RAFT_CUDA_TRY(cudaGetDevice(&device_));
-}
-
 template <typename T, typename IdxT>
 void RaftCagra<T, IdxT>::build(const T* dataset, size_t nrow, cudaStream_t)
 {
diff --git a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
index 42fb9bd4a1..f249eb0395 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
@@ -52,7 +52,19 @@ class RaftIvfFlatGpu : public ANN<T> {
 
   using BuildParam = raft::neighbors::ivf_flat::index_params;
 
-  RaftIvfFlatGpu(Metric metric, int dim, const BuildParam& param);
+  RaftIvfFlatGpu(Metric metric, int dim, const BuildParam& param)
+    : ANN<T>(metric, dim),
+      index_params_(param),
+      dimension_(dim),
+      mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
+  {
+    index_params_.metric                         = parse_metric_type(metric);
+    index_params_.conservative_memory_allocation = true;
+    rmm::mr::set_current_device_resource(&mr_);
+    RAFT_CUDA_TRY(cudaGetDevice(&device_));
+  }
+
+  ~RaftIvfFlatGpu() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
 
   void build(const T* dataset, size_t nrow, cudaStream_t stream) final;
 
@@ -71,39 +83,24 @@ class RaftIvfFlatGpu : public ANN<T> {
   AlgoProperty get_property() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = MemoryType::Device;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = false;
+    property.dataset_memory_type = MemoryType::Device;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
   void save(const std::string& file) const override;
   void load(const std::string&) override;
 
-  ~RaftIvfFlatGpu() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
-
  private:
+  // `mr_` must go first to make sure it dies last
+  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
   raft::device_resources handle_;
   BuildParam index_params_;
   raft::neighbors::ivf_flat::search_params search_params_;
   std::optional<raft::neighbors::ivf_flat::index<T, IdxT>> index_;
   int device_;
   int dimension_;
-  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
 };
 
-template <typename T, typename IdxT>
-RaftIvfFlatGpu<T, IdxT>::RaftIvfFlatGpu(Metric metric, int dim, const BuildParam& param)
-  : ANN<T>(metric, dim),
-    index_params_(param),
-    dimension_(dim),
-    mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
-{
-  index_params_.metric                         = parse_metric_type(metric);
-  index_params_.conservative_memory_allocation = true;
-  rmm::mr::set_current_device_resource(&mr_);
-  RAFT_CUDA_TRY(cudaGetDevice(&device_));
-}
-
 template <typename T, typename IdxT>
 void RaftIvfFlatGpu<T, IdxT>::build(const T* dataset, size_t nrow, cudaStream_t)
 {
diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
index 30bd5ab4d6..dcb42c7c9c 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
@@ -47,11 +47,24 @@ class RaftIvfPQ : public ANN<T> {
 
   struct SearchParam : public AnnSearchParam {
     raft::neighbors::ivf_pq::search_params pq_param;
+    float refine_ratio = 1.0f;
+    auto needs_dataset() const -> bool override { return refine_ratio > 1.0f; }
   };
 
   using BuildParam = raft::neighbors::ivf_pq::index_params;
 
-  RaftIvfPQ(Metric metric, int dim, const BuildParam& param, float refine_ratio);
+  RaftIvfPQ(Metric metric, int dim, const BuildParam& param)
+    : ANN<T>(metric, dim),
+      index_params_(param),
+      dimension_(dim),
+      mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
+  {
+    rmm::mr::set_current_device_resource(&mr_);
+    index_params_.metric = parse_metric_type(metric);
+    RAFT_CUDA_TRY(cudaGetDevice(&device_));
+  }
+
+  ~RaftIvfPQ() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
 
   void build(const T* dataset, size_t nrow, cudaStream_t stream) final;
 
@@ -71,17 +84,16 @@ class RaftIvfPQ : public ANN<T> {
   AlgoProperty get_property() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = MemoryType::Host;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = refine_ratio_ > 1.0;
+    property.dataset_memory_type = MemoryType::Host;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
   void save(const std::string& file) const override;
   void load(const std::string&) override;
 
-  ~RaftIvfPQ() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
-
  private:
+  // `mr_` must go first to make sure it dies last
+  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
   raft::device_resources handle_;
   BuildParam index_params_;
   raft::neighbors::ivf_pq::search_params search_params_;
@@ -89,21 +101,8 @@ class RaftIvfPQ : public ANN<T> {
   int device_;
   int dimension_;
   float refine_ratio_ = 1.0;
-  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
   raft::device_matrix_view<const T, IdxT> dataset_;
 };
-template <typename T, typename IdxT>
-RaftIvfPQ<T, IdxT>::RaftIvfPQ(Metric metric, int dim, const BuildParam& param, float refine_ratio)
-  : ANN<T>(metric, dim),
-    index_params_(param),
-    dimension_(dim),
-    refine_ratio_(refine_ratio),
-    mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
-{
-  rmm::mr::set_current_device_resource(&mr_);
-  index_params_.metric = parse_metric_type(metric);
-  RAFT_CUDA_TRY(cudaGetDevice(&device_));
-}
 
 template <typename T, typename IdxT>
 void RaftIvfPQ<T, IdxT>::save(const std::string& file) const
@@ -134,6 +133,7 @@ void RaftIvfPQ<T, IdxT>::set_search_param(const AnnSearchParam& param)
 {
   auto search_param = dynamic_cast<const SearchParam&>(param);
   search_params_    = search_param.pq_param;
+  refine_ratio_     = search_param.refine_ratio;
   assert(search_params_.n_probes <= index_params_.n_lists);
 }
 
diff --git a/docs/source/ann_benchmarks_low_level.md b/docs/source/ann_benchmarks_low_level.md
index f95d01f66f..d7cc2a3310 100644
--- a/docs/source/ann_benchmarks_low_level.md
+++ b/docs/source/ann_benchmarks_low_level.md
@@ -21,126 +21,204 @@ mv glove-100-angular.groundtruth.distances.fbin glove-100-inner/groundtruth.dist
 popd
 
 # (2) build index
-./cpp/build/RAFT_IVF_FLAT_ANN_BENCH -b -i raft_ivf_flat.nlist1024 conf/glove-100-inner.json
+./cpp/build/RAFT_IVF_FLAT_ANN_BENCH \
+  --data_prefix=cpp/bench/ann/data \
+  --build \
+  --benchmark_filter="raft_ivf_flat\..*" \
+  cpp/bench/ann/conf/glove-100-inner.json
 
 # (3) search
-./cpp/build/RAFT_IVF_FLAT_ANN_BENCH -s -i raft_ivf_flat.nlist1024 conf/glove-100-inner.json
-
-# (4) evaluate result
-pushd
-cd cpp/bench/ann
-./scripts/eval.pl \
-  -o result.csv \
-  data/glove-100-inner/groundtruth.neighbors.ibin \
-  result/glove-100-inner/faiss_ivf_flat
-popd 
-
-# optional step: plot QPS-Recall figure using data in result.csv with your favorite tool
+./cpp/build/RAFT_IVF_FLAT_ANN_BENCH \
+  --data_prefix=cpp/bench/ann/data \
+  --benchmark_min_time=2s \
+  --benchmark_out=ivf_flat_search.csv \
+  --benchmark_out_format=csv \
+  --benchmark_counters_tabular \
+  --search \
+  --benchmark_filter="raft_ivf_flat\..*"
+  cpp/bench/ann/conf/glove-100-inner.json
+
+# optional step: plot QPS-Recall figure using data in ivf_flat_search.csv with your favorite tool
 ```
 
-##### Step 1: Prepare Dataset <a id='bash-prepare-dataset'></a>
-[Instructions](ann_benchmarks_dataset.md)
+##### Step 1: Prepare Dataset
+A dataset usually has 4 binary files containing database vectors, query vectors, ground truth neighbors and their corresponding distances. For example, Glove-100 dataset has files `base.fbin` (database vectors), `query.fbin` (query vectors), `groundtruth.neighbors.ibin` (ground truth neighbors), and `groundtruth.distances.fbin` (ground truth distances). The first two files are for index building and searching, while the other two are associated with a particular distance and are used for evaluation.
+
+The file suffixes `.fbin`, `.f16bin`, `.ibin`, `.u8bin`, and `.i8bin` denote that the data type of vectors stored in the file are `float32`, `float16`(a.k.a `half`), `int`, `uint8`, and `int8`, respectively.
+These binary files are little-endian and the format is: the first 8 bytes are `num_vectors` (`uint32_t`) and `num_dimensions` (`uint32_t`), and the following `num_vectors * num_dimensions * sizeof(type)` bytes are vectors stored in row-major order.
+
+Some implementation can take `float16` database and query vectors as inputs and will have better performance. Use `script/fbin_to_f16bin.py` to transform dataset from `float32` to `float16` type.
+
+Commonly used datasets can be downloaded from two websites:
+1. Million-scale datasets can be found at the [Data sets](https://github.com/erikbern/ann-benchmarks#data-sets) section of [`ann-benchmarks`](https://github.com/erikbern/ann-benchmarks).
+
+    However, these datasets are in HDF5 format. Use `cpp/bench/ann/scripts/hdf5_to_fbin.py` to transform the format. A few Python packages are required to run it:
+    ```bash
+    pip3 install numpy h5py
+    ```
+    The usage of this script is:
+    ```bash
+    $ cpp/bench/ann/scripts/hdf5_to_fbin.py
+    usage: scripts/hdf5_to_fbin.py [-n] <input>.hdf5
+       -n: normalize base/query set
+     outputs: <input>.base.fbin
+              <input>.query.fbin
+              <input>.groundtruth.neighbors.ibin
+              <input>.groundtruth.distances.fbin
+    ```
+    So for an input `.hdf5` file, four output binary files will be produced. See previous section for an example of prepossessing GloVe dataset.
+
+    Most datasets provided by `ann-benchmarks` use `Angular` or `Euclidean` distance. `Angular` denotes cosine distance. However, computing cosine distance reduces to computing inner product by normalizing vectors beforehand. In practice, we can always do the normalization to decrease computation cost, so it's better to measure the performance of inner product rather than cosine distance. The `-n` option of `hdf5_to_fbin.py` can be used to normalize the dataset.
+
+2. Billion-scale datasets can be found at [`big-ann-benchmarks`](http://big-ann-benchmarks.com). The ground truth file contains both neighbors and distances, thus should be split. A script is provided for this:
+    ```bash
+    $ cpp/bench/ann/scripts/split_groundtruth.pl
+    usage: script/split_groundtruth.pl input output_prefix
+    ```
+    Take Deep-1B dataset as an example:
+    ```bash
+    pushd
+    cd cpp/bench/ann
+    mkdir -p data/deep-1B && cd data/deep-1B
+    # download manually "Ground Truth" file of "Yandex DEEP"
+    # suppose the file name is deep_new_groundtruth.public.10K.bin
+    ../../scripts/split_groundtruth.pl deep_new_groundtruth.public.10K.bin groundtruth
+    # two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced
+    popd
+    ```
+    Besides ground truth files for the whole billion-scale datasets, this site also provides ground truth files for the first 10M or 100M vectors of the base sets. This mean we can use these billion-scale datasets as million-scale datasets. To facilitate this, an optional parameter `subset_size` for dataset can be used. See the next step for further explanation.
 
 
 ##### Step 2: Build Index
-An index is a data structure to facilitate searching. Different algorithms may use different data structures for their index. We can use `RAFT_IVF_FLAT_ANN_BENCH -b` to build an index and save it to disk.
+An index is a data structure to facilitate searching. Different algorithms may use different data structures for their index. We can use `RAFT_IVF_FLAT_ANN_BENCH --build` to build an index and save it to disk.
 
 To run a benchmark executable, like `RAFT_IVF_FLAT_ANN_BENCH`, a JSON configuration file is required. Refer to [`cpp/bench/ann/conf/glove-100-inner.json`](../../cpp/cpp/bench/ann/conf/glove-100-inner.json) as an example. Configuration file has 3 sections:
 * `dataset` section specifies the name and files of a dataset, and also the distance in use. Since the `*_ANN_BENCH` programs are for index building and searching, only `base_file` for database vectors and `query_file` for query vectors are needed. Ground truth files are for evaluation thus not needed.
     - To use only a subset of the base dataset, an optional parameter `subset_size` can be specified. It means using only the first `subset_size` vectors of `base_file` as the base dataset.
 * `search_basic_param` section specifies basic parameters for searching:
     - `k` is the "k" in "k-nn", that is, the number of neighbors (or results) we want from the searching.
-    -  `run_count` means how many times we run the searching. A single run of searching will search neighbors for all vectors in `test` set. The total time used for a run is recorded, and the final searching time is the smallest one among these runs.
 * `index` section specifies an array of configurations for index building and searching:
     - `build_param` and `search_params` are parameters for building and searching, respectively. `search_params` is an array since we will search with different parameters to get different recall values.
     - `file` is the file name of index. Building will save built index to this file, while searching will load this file.
-    - `search_result_file` is the file name prefix of searching results. Searching will save results to these files, and plotting script will read these files to plot results. Note this is a prefix rather than a whole file name. Suppose its value is `${prefix}`, then the real file names are like `${prefix}.0.{ibin|txt}`, `${prefix}.1.{ibin|txt}`, etc. Each of them corresponds to an item in `search_params` array. That is, for one searching parameter, there will be some corresponding search result files.
     - if `multigpu` is specified, multiple GPUs will be used for index build and search.
     - if `refine_ratio` is specified, refinement, as a post-processing step of search, will be done. It's for algorithms that compress vectors. For example, if `"refine_ratio" : 2` is set, 2`k` results are first computed, then exact distances of them are computed using original uncompressed vectors, and finally top `k` results among them are kept.
 
 
-The usage of `*_ANN_BENCH` can be found by running `*_ANN_BENCH -h` on one of the executables:
+The usage of `*_ANN_BENCH` can be found by running `*_ANN_BENCH --help` on one of the executables:
 ```bash
-$ ./cpp/build/*_ANN_BENCH -h
-usage: ./cpp/build/*_ANN_BENCH -b|s [-f] [-i index_names] conf.json
-   -b: build mode, will build index
-   -s: search mode, will search using built index
-       one and only one of -b and -s should be specified
-   -f: force overwriting existing output files
-   -i: by default will build/search all the indices found in conf.json
-       '-i' can be used to select a subset of indices
-       'index_names' is a list of comma-separated index names
-       '*' is allowed as the last character of a name to select all matched indices
-       for example, -i "hnsw1,hnsw2,faiss" or -i "hnsw*,faiss"
-```
-* `-b`: build index.
-* `-s`: do the searching with built index.
-* `-f`: before doing the real task, the program checks that needed input files exist and output files don't exist. If these conditions are not met, it quits so no file would be overwritten accidentally. To ignore existing output files and force overwrite them, use the `-f` option.
-* `-i`: by default, the `-b` flag will build all indices found in the configuration file, and `-s` will search using all the indices. To select a subset of indices to build or search, we can use the `-i` option.
-
-It's easier to describe the usage of `-i` option with an example. Suppose we have a configuration file `a.json`, and it contains:
-```json
-  "index" : [
-    {
-      "name" : "hnsw1",
-      ...
-    },
-    {
-      "name" : "hnsw1",
-      ...
-    },
-    {
-      "name" : "faiss",
-      ...
-    }
-  ]
+$ ./cpp/build/*_ANN_BENCH --help
+benchmark [--benchmark_list_tests={true|false}]
+          [--benchmark_filter=<regex>]
+          [--benchmark_min_time=`<integer>x` OR `<float>s` ]
+          [--benchmark_min_warmup_time=<min_warmup_time>]
+          [--benchmark_repetitions=<num_repetitions>]
+          [--benchmark_enable_random_interleaving={true|false}]
+          [--benchmark_report_aggregates_only={true|false}]
+          [--benchmark_display_aggregates_only={true|false}]
+          [--benchmark_format=<console|json|csv>]
+          [--benchmark_out=<filename>]
+          [--benchmark_out_format=<json|console|csv>]
+          [--benchmark_color={auto|true|false}]
+          [--benchmark_counters_tabular={true|false}]
+          [--benchmark_context=<key>=<value>,...]
+          [--benchmark_time_unit={ns|us|ms|s}]
+          [--v=<verbosity>]
+          [--build|--search]
+          [--overwrite]
+          [--data_prefix=<prefix>]
+          <conf>.json
+
+Note the non-standard benchmark parameters:
+  --build: build mode, will build index
+  --search: search mode, will search using the built index
+            one and only one of --build and --search should be specified
+  --overwrite: force overwriting existing index files
+  --data_prefix=<prefix>: prepend <prefix> to dataset file paths specified in the <conf>.json.
+  --override_kv=<key:value1:value2:...:valueN>: override a build/search key one or more times multiplying the number of configurations; you can use this parameter multiple times to get the Cartesian product of benchmark configs.
 ```
-Then,
-```bash
-# build all indices: hnsw1, hnsw2 and faiss
-./cpp/build/HNSWLIB_ANN_BENCH -b a.json
-
-# build only hnsw1
-./cpp/build/HNSWLIB_ANN_BENCH -b -i hnsw1 a.json
+* `--build`: build index.
+* `--search`: do the searching with built index.
+* `--overwrite`: by default, the building mode skips building an index if it find out it already exists. This is useful when adding more configurations to the config; only new indices are build without the need to specify an elaborate filtering regex. By supplying `overwrite` flag, you disable this behavior; all indices are build regardless whether they are already stored on disk.
+* `--data_prefix`: prepend an arbitrary path to the data file paths. By default, it is equal to `data`. Note, this does not apply to index file paths.
+* `--override_kv`: override a build/search key one or more times multiplying the number of configurations.
 
-# build hnsw1 and hnsw2
-./cpp/build/HNSWLIB_ANN_BENCH -b -i hnsw1,hnsw2 a.json
-
-# build hnsw1 and hnsw2
-./cpp/build/HNSWLIB_ANN_BENCH -b -i 'hnsw*' a.json
-
-# build faiss
-./cpp/build/FAISS_IVF_FLAT_ANN_BENCH -b -i 'faiss' a.json
-```
-In the last two commands, we use wildcard "`*`" to match both `hnsw1` and `hnsw2`. Note the use of "`*`" is quite limited. It can occur only at the end of a pattern, so both "`*nsw1`" and "`h*sw1`" are interpreted literally and will not match anything. Also note that quotation marks must be used to prevent "`*`" from being interpreted by the shell.
+In addition to these ANN-specific flags, you can use all of the standard google benchmark flags. Some of the useful flags:
+* `--benchmark_filter`: specify subset of benchmarks to run
+* `--benchmark_out`, `--benchmark_out_format`: store the output to a file
+* `--benchmark_list_tests`: check the available configurations
+* `--benchmark_min_time`: specify the minimum duration or number of iterations per case to improve accuracy of the benchmarks.
 
+Refer to the google benchmark [user guide](https://github.com/google/benchmark/blob/main/docs/user_guide.md#command-line) for more information about the command-line usage.
 
 ##### Step 3: Searching
-Use the `-s` flag on any of the `*_ANN_BENCH` executables. Other options are the same as in step 2.
-
-
-##### Step 4: Evaluating Results
-Use `cpp/bench/ann/scripts/eval.pl` to evaluate benchmark results. The usage is:
-```bash
-$ cpp/bench/ann/scripts/eval.pl
-usage: [-f] [-o output.csv] groundtruth.neighbors.ibin result_paths...
-  result_paths... are paths to the search result files.
-    Can specify multiple paths.
-    For each of them, if it's a directory, all the .txt files found under
-    it recursively will be regarded as inputs.
-
-  -f: force to recompute recall and update it in result file if needed
-  -o: also write result to a csv file
+Use the `--search` flag on any of the `*_ANN_BENCH` executables. Other options are the same as in step 2.
+
+## Adding a new ANN algorithm
+Implementation of a new algorithm should be a class that inherits `class ANN` (defined in `cpp/bench/ann/src/ann.h`) and implements all the pure virtual functions.
+
+In addition, it should define two `struct`s for building and searching parameters. The searching parameter class should inherit `struct ANN<T>::AnnSearchParam`. Take `class HnswLib` as an example, its definition is:
+```c++
+template<typename T>
+class HnswLib : public ANN<T> {
+public:
+  struct BuildParam {
+    int M;
+    int ef_construction;
+    int num_threads;
+  };
+
+  using typename ANN<T>::AnnSearchParam;
+  struct SearchParam : public AnnSearchParam {
+    int ef;
+    int num_threads;
+  };
+
+  // ...
+};
 ```
-<a id='result-filepath-example'></a>Note that there can be multiple arguments for paths of result files. Each argument can be either a file name or a path. If it's a directory, all files found under it recursively will be used as input files.
-An example:
-```bash
-cpp/bench/ann/scripts/eval.pl groundtruth.neighbors.ibin \
-  result/glove-100-angular/10/hnsw/angular_M_24_*.txt \
-  result/glove-100-angular/10/faiss/
+
+The benchmark program uses JSON configuration file. To add the new algorithm to the benchmark, need be able to specify `build_param`, whose value is a JSON object, and `search_params`, whose value is an array of JSON objects, for this algorithm in configuration file. Still take the configuration for `HnswLib` as an example:
+```json
+{
+  "name" : "...",
+  "algo" : "hnswlib",
+  "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+  "file" : "/path/to/file",
+  "search_params" : [
+    {"ef":10, "numThreads":1},
+    {"ef":20, "numThreads":1},
+    {"ef":40, "numThreads":1}
+  ]
+},
 ```
-The search result files used by this command are files matching `result/glove-100-angular/10/hnsw/angular_M_24_*.txt`, and all `.txt` files under directory `result/glove-100-angular/10/faiss/` recursively.
 
-This script prints recall and QPS for every result file. Also, it outputs estimated "recall at QPS=2000" and "QPS at recall=0.9", which can be used to compare performance quantitatively.
+How to interpret these JSON objects is totally left to the implementation and should be specified in `cpp/bench/ann/src/factory.cuh`:
+1. First, add two functions for parsing JSON object to `struct BuildParam` and `struct SearchParam`, respectively:
+    ```c++
+    template<typename T>
+    void parse_build_param(const nlohmann::json& conf,
+                           typename cuann::HnswLib<T>::BuildParam& param) {
+      param.ef_construction = conf.at("efConstruction");
+      param.M = conf.at("M");
+      if (conf.contains("numThreads")) {
+        param.num_threads = conf.at("numThreads");
+      }
+    }
 
-It saves recall value in result txt file, so avoids to recompute recall if the same command is run again. To force to recompute recall, option `-f` can be used. If option `-o <output.csv>` is specified, a csv output file will be produced. This file can be used to plot Throughput-Recall curves.
+    template<typename T>
+    void parse_search_param(const nlohmann::json& conf,
+                            typename cuann::HnswLib<T>::SearchParam& param) {
+      param.ef = conf.at("ef");
+      if (conf.contains("numThreads")) {
+        param.num_threads = conf.at("numThreads");
+      }
+    }
+    ```
+
+2. Next, add corresponding `if` case to functions `create_algo()` and `create_search_param()` by calling parsing functions. The string literal in `if` condition statement must be the same as the value of `algo` in configuration file. For example,
+    ```c++
+      // JSON configuration file contains a line like:  "algo" : "hnswlib"
+      if (algo == "hnswlib") {
+         // ...
+      }
+    ```

From 7473c620b9aacb53bd26b6d8827384f536219f84 Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Wed, 9 Aug 2023 17:25:25 +0200
Subject: [PATCH 02/70] Disable NVTX if the nvtx3 headers are missing

---
 cpp/bench/ann/CMakeLists.txt      |  7 +++++++
 cpp/bench/ann/src/common/util.hpp | 14 ++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 5e31e71b06..2ccdead89a 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -237,6 +237,13 @@ set_target_properties(
              INSTALL_RPATH "\$ORIGIN"
 )
 
+# Disable NVTX when the nvtx3 headers are missing
+set(_CMAKE_REQUIRED_INCLUDES_ORIG ${CMAKE_REQUIRED_INCLUDES})
+get_target_property(CMAKE_REQUIRED_INCLUDES ANN_BENCH INCLUDE_DIRECTORIES)
+CHECK_INCLUDE_FILE_CXX(nvtx3/nvToolsExt.h NVTX3_HEADERS_FOUND)
+set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_ORIG})
+target_compile_definitions(ANN_BENCH PRIVATE $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>)
+
 target_link_options(ANN_BENCH PRIVATE -export-dynamic)
 
 install(
diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp
index 88a9b4bb7a..39eb986b30 100644
--- a/cpp/bench/ann/src/common/util.hpp
+++ b/cpp/bench/ann/src/common/util.hpp
@@ -18,7 +18,9 @@
 #include "ann_types.hpp"
 
 #include "cuda_stub.hpp"
+#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
 #include <nvtx3/nvToolsExt.h>
+#endif
 
 #include <sys/stat.h>
 #include <sys/types.h>
@@ -164,6 +166,7 @@ inline auto cuda_info()
 }
 
 struct nvtx_case {
+#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
  private:
   std::string case_name_;
   std::array<char, 32> iter_name_{0};
@@ -171,9 +174,11 @@ struct nvtx_case {
   int64_t iteration_ = 0;
   nvtxEventAttributes_t case_attrib_{0};
   nvtxEventAttributes_t iter_attrib_{0};
+#endif
 
  public:
   struct nvtx_lap {
+#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
    private:
     nvtxDomainHandle_t domain_;
 
@@ -184,8 +189,10 @@ struct nvtx_case {
     }
     nvtx_lap() = delete;
     ~nvtx_lap() noexcept { nvtxDomainRangePop(domain_); }
+#endif
   };
 
+#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
   explicit nvtx_case(std::string case_name)
     : case_name_(std::move(case_name)), domain_(nvtxDomainCreateA("ANN benchmark"))
   {
@@ -208,9 +215,13 @@ struct nvtx_case {
     nvtxDomainRangePop(domain_);
     nvtxDomainDestroy(domain_);
   }
+#else
+  explicit nvtx_case(std::string) {}
+#endif
 
   [[nodiscard]] auto lap() -> nvtx_case::nvtx_lap
   {
+#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
     auto i     = iteration_++;
     uint32_t c = (i % 5);
     uint32_t r = 150 + c * 20;
@@ -220,6 +231,9 @@ struct nvtx_case {
     iter_attrib_.message.ascii = iter_name_.data();
     iter_attrib_.color         = (r << 16) + (g << 8) + b;
     return nvtx_lap{domain_, &iter_attrib_};
+#else
+    return nvtx_lap{};
+#endif
   }
 };
 

From 291788697af6fbe0eabbd7136ee1554809f7072a Mon Sep 17 00:00:00 2001
From: divyegala <divyegala@gmail.com>
Date: Fri, 11 Aug 2023 17:35:28 -0700
Subject: [PATCH 03/70] try to run gbench executable

---
 bench/ann/algos.yaml                   |  16 +--
 bench/ann/run.py                       | 173 ++++++++++++++-----------
 cpp/bench/ann/CMakeLists.txt           |  19 ++-
 cpp/bench/ann/src/common/benchmark.cpp |  15 ++-
 4 files changed, 126 insertions(+), 97 deletions(-)

diff --git a/bench/ann/algos.yaml b/bench/ann/algos.yaml
index 5f554fc46b..46d3c9e801 100644
--- a/bench/ann/algos.yaml
+++ b/bench/ann/algos.yaml
@@ -1,30 +1,18 @@
 faiss_gpu_ivf_flat:
-  executable: FAISS_IVF_FLAT_ANN_BENCH
-  disabled: false
-faiss_gpu_flat:
-  executable: FAISS_IVF_FLAT_ANN_BENCH
   disabled: false
 faiss_gpu_ivf_pq:
-  executable: FAISS_IVF_PQ_ANN_BENCH
   disabled: false
 faiss_gpu_ivf_sq:
-  executable: FAISS_IVF_PQ_ANN_BENCH
   disabled: false
-faiss_gpu_bfknn:
-  executable: FAISS_BFKNN_ANN_BENCH
+faiss_gpu_flat:
   disabled: false
 raft_ivf_flat:
-  executable: RAFT_IVF_FLAT_ANN_BENCH
   disabled: false
 raft_ivf_pq:
-  executable: RAFT_IVF_PQ_ANN_BENCH
   disabled: false
 raft_cagra:
-  executable: RAFT_CAGRA_ANN_BENCH
   disabled: false
 ggnn:
-  executable: GGNN_ANN_BENCH
   disabled: false
 hnswlib:
-  executable: HNSWLIB_ANN_BENCH
-  disabled: false
\ No newline at end of file
+  disabled: false
diff --git a/bench/ann/run.py b/bench/ann/run.py
index ebaef1e004..90175f7433 100644
--- a/bench/ann/run.py
+++ b/bench/ann/run.py
@@ -25,54 +25,50 @@ def validate_algorithm(algos_conf, algo):
     return algo in algos_conf_keys and not algos_conf[algo]["disabled"]
 
 
-def find_executable(algos_conf, algo):
-    executable = algos_conf[algo]["executable"]
+def find_executable():
+    executable = "ANN_BENCH"
     conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann",
                               executable)
     build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
     if os.path.exists(conda_path):
-        return (executable, conda_path)
+        return conda_path
     elif os.path.exists(build_path):
-        return (executable, build_path)
+        return build_path
     else:
         raise FileNotFoundError(executable)
 
 
-def run_build_and_search(conf_filename, conf_file, executables_to_run,
+def run_build_and_search(conf_filename, conf_file, dataset_path,
                          force, conf_filedir, build, search):
-    for executable, ann_executable_path in executables_to_run.keys():
-        # Need to write temporary configuration
-        temp_conf_filename = f"temporary_executable_{conf_filename}"
-        temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename)
-        with open(temp_conf_filepath, "w") as f:
-            temp_conf = dict()
-            temp_conf["dataset"] = conf_file["dataset"]
-            temp_conf["search_basic_param"] = conf_file["search_basic_param"]
-            temp_conf["index"] = executables_to_run[(executable, 
-                                                     ann_executable_path)]["index"]
-            json.dump(temp_conf, f)
-
-        if build:
-            if force:
-                p = subprocess.Popen([ann_executable_path, "-b", "-f",
-                                    temp_conf_filepath])
-                p.wait()
-            else:
-                p = subprocess.Popen([ann_executable_path, "-b",
-                                    temp_conf_filepath])
-                p.wait()
-
-        if search:
-            if force:
-                p = subprocess.Popen([ann_executable_path, "-s", "-f",
-                                      temp_conf_filepath])
-                p.wait()
-            else:
-                p = subprocess.Popen([ann_executable_path, "-s",
-                                      temp_conf_filepath])
-                p.wait()
-
-        os.remove(temp_conf_filepath)
+    ann_executable_path = find_executable()
+
+    # Need to write temporary configuration
+    temp_conf_filename = f"temporary_{conf_filename}"
+    temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename)
+    with open(temp_conf_filepath, "w") as f:
+        json.dump(conf_file, f)
+
+    data_prefix = "/".join(dataset_path.split("/")[:-1])
+    if build:
+        cmd = [ann_executable_path, "--build", "--data_prefix="+data_prefix]
+        if force:
+            cmd = cmd + ["--overwrite"]
+        cmd = cmd + [temp_conf_filepath]
+        print(cmd)
+        p = subprocess.Popen(cmd)
+        p.wait()
+
+    if search:
+        cmd = [ann_executable_path, "--search", "--benchmark_out_format=csv",
+               "--benchmark_out=" + os.path.join(dataset_path, "result.csv"),
+               "--data_prefix=" + data_prefix]
+        if force:
+            cmd = cmd + ["--overwrite"]
+        cmd = cmd + [temp_conf_filepath]
+        p = subprocess.Popen(cmd)
+        p.wait()
+
+    os.remove(temp_conf_filepath)
 
 
 def main():
@@ -90,7 +86,6 @@ def main():
     parser.add_argument(
         "--dataset",
         help="dataset whose configuration file will be used",
-        default="glove-100-inner"
     )
     parser.add_argument(
         "--dataset-path",
@@ -118,6 +113,12 @@ def main():
                         help="re-run algorithms even if their results \
                               already exist",
                         action="store_true")
+    parser.add_argument("--batch-size",
+                        help="batch size for querying",
+                        default=1)
+    parser.add_argument("--k",
+                        help="k neighbors",
+                        default=10)
 
     args = parser.parse_args()
 
@@ -133,75 +134,93 @@ def main():
     # Read configuration file associated to dataset
     if args.configuration:
         conf_filepath = args.configuration
+    elif args.dataset:
+        conf_filepath = \
+            os.path.join(scripts_path, "conf", f"{args.dataset}.json")
     else:
-        conf_filepath = os.path.join(scripts_path, "conf", f"{args.dataset}.json")
+        raise ValueError("One of parameters `configuration` or \
+                         `dataset` need to be provided")
     conf_filename = conf_filepath.split("/")[-1]
     conf_filedir = "/".join(conf_filepath.split("/")[:-1])
-    dataset_name = conf_filename.replace(".json", "")
-    dataset_path = os.path.join(args.dataset_path, dataset_name)
+    dataset = conf_filename.replace(".json", "")
+    dataset_path = os.path.join(args.dataset_path, dataset)
     if not os.path.exists(conf_filepath):
         raise FileNotFoundError(conf_filename)
+    if not os.path.exists(dataset_path):
+        raise FileNotFoundError(dataset_path)
 
     with open(conf_filepath, "r") as f:
         conf_file = json.load(f)
 
-    # Replace base, query to dataset-path
-    conf_file["dataset"]["base_file"] = os.path.join(dataset_path, "base.fbin")
-    conf_file["dataset"]["query_file"] = os.path.join(dataset_path, "query.fbin")
-    # Ensure base and query files exist for dataset
-    if not os.path.exists(conf_file["dataset"]["base_file"]):
-        raise FileNotFoundError(conf_file["dataset"]["base_file"])
-    if not os.path.exists(conf_file["dataset"]["query_file"]):
-        raise FileNotFoundError(conf_file["dataset"]["query_file"])
-
-    executables_to_run = dict()
+    # # Replace base, query, gr to dataset-path
+    # conf_file["dataset"]["base_file"] = os.path.join(dataset_path, "base.fbin")
+    # conf_file["dataset"]["query_file"] = os.path.join(dataset_path, "query.fbin")
+    # conf_file["dataset"]["groundtruth_neighbors_file"] = \
+    #     os.path.join(dataset_path, "groundtruth.neighbors.ibin")
+    # # Ensure base and query files exist for dataset
+    # if not os.path.exists(conf_file["dataset"]["base_file"]):
+    #     raise FileNotFoundError(conf_file["dataset"]["base_file"])
+    # if not os.path.exists(conf_file["dataset"]["query_file"]):
+    #     raise FileNotFoundError(conf_file["dataset"]["query_file"])
+    # if not os.path.exists(conf_file["dataset"]["groundtruth_neighbors_file"]):
+    #     raise FileNotFoundError(conf_file["dataset"]["groundtruth_neighbors_file"])
+
+    # executables_to_run = dict()
+    indices_to_run = []
     # At least one named index should exist in config file
     if args.indices:
         indices = set(args.indices.split(","))
         # algo associated with index should still be present in algos.yaml
         # and enabled
-        for index in conf_file["index"]:
+        for pos, index in enumerate(conf_file["index"]):
             curr_algo = index["algo"]
             if index["name"] in indices and \
                     validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo)
-                if executable_path not in executables_to_run:
-                    executables_to_run[executable_path] = {"index": []}
-                executables_to_run[executable_path]["index"].append(index)
+                # executable_path = find_executable(algos_conf, curr_algo)
+                # if executable_path not in executables_to_run:
+                #     executables_to_run[executable_path] = {"index": []}
+                # executables_to_run[executable_path]["index"].append(index)
+                indices_to_run.append(pos)
 
     # switch to named algorithms if indices parameter is not supplied
     elif args.algorithms:
         algorithms = set(args.algorithms.split(","))
         # pick out algorithms from conf file that exist
         # and are enabled in algos.yaml
-        for index in conf_file["index"]:
+        for pos, index in enumerate(conf_file["index"]):
             curr_algo = index["algo"]
             if curr_algo in algorithms and \
                     validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo)
-                if executable_path not in executables_to_run:
-                    executables_to_run[executable_path] = {"index": []}
-                executables_to_run[executable_path]["index"].append(index)
+                # executable_path = find_executable(algos_conf, curr_algo)
+                # if executable_path not in executables_to_run:
+                #     executables_to_run[executable_path] = {"index": []}
+                # executables_to_run[executable_path]["index"].append(index)
+                indices_to_run.append(pos)
 
     # default, try to run all available algorithms
     else:
-        for index in conf_file["index"]:
+        for pos, index in enumerate(conf_file["index"]):
             curr_algo = index["algo"]
             if validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo)
-                if executable_path not in executables_to_run:
-                    executables_to_run[executable_path] = {"index": []}
-                executables_to_run[executable_path]["index"].append(index)
-
-    # Replace build, search to dataset path
-    for executable_path in executables_to_run:
-        for pos, index in enumerate(executables_to_run[executable_path]["index"]):
-            index["file"] = os.path.join(dataset_path, "index", index["name"])
-            index["search_result_file"] = \
-                os.path.join(dataset_path, "result", index["name"])
-            executables_to_run[executable_path]["index"][pos] = index
-
-    run_build_and_search(conf_filename, conf_file, executables_to_run,
+                # executable_path = find_executable(algos_conf, curr_algo)
+                # if executable_path not in executables_to_run:
+                #     executables_to_run[executable_path] = {"index": []}
+                # executables_to_run[executable_path]["index"].append(index)
+                indices_to_run.append(pos)
+
+    # filter available indices
+    if len(indices_to_run) == 0:
+        raise ValueError("No indices found to run")
+    conf_file["index"] = [conf_file["index"][i] for i in indices_to_run]
+
+    # Replace index build to dataset path
+    for pos, index in enumerate(conf_file["index"]):
+        index["file"] = os.path.join(dataset_path, "index", index["name"])
+        conf_file["index"][pos] = index
+
+    print(conf_file)
+
+    run_build_and_search(conf_filename, conf_file, dataset_path,
                          args.force, conf_filedir, build, search)
 
 
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 2ccdead89a..6df4df082f 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -15,9 +15,10 @@
 # ##################################################################################################
 # * compiler function -----------------------------------------------------------------------------
 
-option(RAFT_ANN_BENCH_USE_FAISS_BFKNN "Include faiss' brute-force knn algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_USE_FAISS_IVF_SQ "Include faiss' brute-force knn algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_USE_FAISS_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON)
@@ -183,18 +184,26 @@ endif()
 
 if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT)
   ConfigureAnnBench(
-    NAME FAISS_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
+    NAME FAISS_GPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
   )
 endif()
 
 if(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ)
   ConfigureAnnBench(
-    NAME FAISS_IVF_PQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
+    NAME FAISS_GPU_IVF_PQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
   )
 endif()
 
-if(RAFT_ANN_BENCH_USE_FAISS_BFKNN)
-  ConfigureAnnBench(NAME FAISS_BFKNN PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss)
+if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT)
+  ConfigureAnnBench(
+    NAME FAISS_GPU_IVF_SQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
+  )
+endif()
+
+if(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ)
+  ConfigureAnnBench(
+    NAME FAISS_GPU_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
+  )
 endif()
 
 if(RAFT_ANN_BENCH_USE_GGNN)
diff --git a/cpp/bench/ann/src/common/benchmark.cpp b/cpp/bench/ann/src/common/benchmark.cpp
index c73f2ed22a..cfffc36515 100644
--- a/cpp/bench/ann/src/common/benchmark.cpp
+++ b/cpp/bench/ann/src/common/benchmark.cpp
@@ -51,7 +51,20 @@ auto load_lib(const std::string& algo) -> void*
 
   if (found != libs.end()) { return found->second.handle; }
   auto lib_name = "lib" + algo + "_ann_bench.so";
-  return libs.emplace(algo, lib_name).first->second.handle;
+  std::string lib_path = "";
+  if (std::getenv("CONDA_PREFIX") != nullptr) {
+    auto conda_path = std::string(std::getenv("CONDA_PREFIX")) + "/bin" + "/ann/";
+    if (std::filesystem::exists(conda_path + "ANN_BENCH")) {
+      lib_path = conda_path;
+    }
+  }
+  if (std::getenv("RAFT_HOME") != nullptr) {
+    auto build_path = std::string(std::getenv("RAFT_HOME")) + "/cpp" + "/build/";
+    if (std::filesystem::exists(build_path + "ANN_BENCH")) {
+      lib_path = build_path;
+    }
+  }
+  return libs.emplace(algo, lib_path + lib_name).first->second.handle;
 }
 
 auto get_fun_name(void* addr) -> std::string

From 49732b166abef8f17c7287bb211e52b6a9b33ff8 Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Thu, 17 Aug 2023 10:01:31 +0200
Subject: [PATCH 04/70] Allow to compile ANN_BENCH without CUDA

---
 cpp/bench/ann/CMakeLists.txt           |  5 ++++-
 cpp/bench/ann/src/common/ann_types.hpp |  2 ++
 cpp/bench/ann/src/common/benchmark.cpp |  2 ++
 cpp/bench/ann/src/common/benchmark.hpp |  3 ++-
 cpp/bench/ann/src/common/cuda_stub.hpp | 13 +++++++++++-
 cpp/bench/ann/src/common/util.hpp      | 28 ++++++++++++++++++++------
 6 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 2ccdead89a..9761dd74e5 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -242,7 +242,10 @@ set(_CMAKE_REQUIRED_INCLUDES_ORIG ${CMAKE_REQUIRED_INCLUDES})
 get_target_property(CMAKE_REQUIRED_INCLUDES ANN_BENCH INCLUDE_DIRECTORIES)
 CHECK_INCLUDE_FILE_CXX(nvtx3/nvToolsExt.h NVTX3_HEADERS_FOUND)
 set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_ORIG})
-target_compile_definitions(ANN_BENCH PRIVATE $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>)
+target_compile_definitions(ANN_BENCH PRIVATE
+  $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}">
+  $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
+)
 
 target_link_options(ANN_BENCH PRIVATE -export-dynamic)
 
diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp
index 828731c3b3..baa1f3fd2f 100644
--- a/cpp/bench/ann/src/common/ann_types.hpp
+++ b/cpp/bench/ann/src/common/ann_types.hpp
@@ -22,7 +22,9 @@
 #include <string>
 #include <vector>
 
+#ifndef CPU_ONLY
 #include <cuda_runtime_api.h>  // cudaStream_t
+#endif
 
 namespace raft::bench::ann {
 
diff --git a/cpp/bench/ann/src/common/benchmark.cpp b/cpp/bench/ann/src/common/benchmark.cpp
index c73f2ed22a..b46623edbf 100644
--- a/cpp/bench/ann/src/common/benchmark.cpp
+++ b/cpp/bench/ann/src/common/benchmark.cpp
@@ -13,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include "cuda_stub.hpp" // must go first
+
 #include "ann_types.hpp"
 
 #define JSON_DIAGNOSTICS 1
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index 97f23838a9..3bfe381d3f 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -15,9 +15,10 @@
  */
 #pragma once
 
+#include "cuda_stub.hpp"  // must go first
+
 #include "ann_types.hpp"
 #include "conf.hpp"
-#include "cuda_stub.hpp"
 #include "dataset.hpp"
 #include "util.hpp"
 
diff --git a/cpp/bench/ann/src/common/cuda_stub.hpp b/cpp/bench/ann/src/common/cuda_stub.hpp
index 879a99697f..e3f9aa9e84 100644
--- a/cpp/bench/ann/src/common/cuda_stub.hpp
+++ b/cpp/bench/ann/src/common/cuda_stub.hpp
@@ -15,7 +15,14 @@
  */
 #pragma once
 
+#ifdef ANN_BENCH_LINK_CUDART
 #include <cuda_runtime_api.h>
+#else
+#define CPU_ONLY
+typedef void* cudaStream_t;
+typedef void* cudaEvent_t;
+#endif
+
 #include <dlfcn.h>
 
 namespace raft::bench::ann {
@@ -24,7 +31,9 @@ struct cuda_lib_handle {
   void* handle{nullptr};
   explicit cuda_lib_handle()
   {
-    handle = dlopen("libcudart.so", RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE);
+#ifdef ANN_BENCH_LINK_CUDART
+    handle = dlopen(ANN_BENCH_LINK_CUDART, RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE);
+#endif
   }
   ~cuda_lib_handle() noexcept
   {
@@ -36,6 +45,7 @@ struct cuda_lib_handle {
 
 static inline cuda_lib_handle cudart{};
 
+#ifndef CPU_ONLY
 namespace stub {
 
 [[gnu::weak, gnu::noinline]] cudaError_t cudaMemcpy(void* dst,
@@ -144,5 +154,6 @@ RAFT_DECLARE_CUDART(cudaRuntimeGetVersion);
 RAFT_DECLARE_CUDART(cudaGetDeviceProperties);
 
 #undef RAFT_DECLARE_CUDART
+#endif
 
 };  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp
index 39eb986b30..19fed52d77 100644
--- a/cpp/bench/ann/src/common/util.hpp
+++ b/cpp/bench/ann/src/common/util.hpp
@@ -47,10 +47,12 @@ struct buf {
     : memory_type(memory_type), size(size), data(nullptr)
   {
     switch (memory_type) {
+#ifndef CPU_ONLY
       case MemoryType::Device: {
         cudaMalloc(reinterpret_cast<void**>(&data), size * sizeof(T));
         cudaMemset(data, 0, size * sizeof(T));
       } break;
+#endif
       default: {
         data = reinterpret_cast<T*>(malloc(size * sizeof(T)));
         std::memset(data, 0, size * sizeof(T));
@@ -61,9 +63,11 @@ struct buf {
   {
     if (data == nullptr) { return; }
     switch (memory_type) {
+#ifndef CPU_ONLY
       case MemoryType::Device: {
         cudaFree(data);
       } break;
+#endif
       default: {
         free(data);
       }
@@ -73,21 +77,23 @@ struct buf {
   [[nodiscard]] auto move(MemoryType target_memory_type) -> buf<T>
   {
     buf<T> r{target_memory_type, size};
+#ifndef CPU_ONLY
     if ((memory_type == MemoryType::Device && target_memory_type != MemoryType::Device) ||
         (memory_type != MemoryType::Device && target_memory_type == MemoryType::Device)) {
       cudaMemcpy(r.data, data, size * sizeof(T), cudaMemcpyDefault);
-    } else {
-      std::swap(data, r.data);
+      return r;
     }
+#endif
+    std::swap(data, r.data);
     return r;
   }
 };
 
 struct cuda_timer {
  private:
-  cudaStream_t stream_;
-  cudaEvent_t start_;
-  cudaEvent_t stop_;
+  cudaStream_t stream_{nullptr};
+  cudaEvent_t start_{nullptr};
+  cudaEvent_t stop_{nullptr};
   double total_time_{0};
 
  public:
@@ -102,33 +108,41 @@ struct cuda_timer {
     cuda_lap(cudaStream_t stream, cudaEvent_t start, cudaEvent_t stop, double& total_time)
       : start_(start), stop_(stop), stream_(stream), total_time_(total_time)
     {
+#ifndef CPU_ONLY
       cudaStreamSynchronize(stream_);
       cudaEventRecord(start_, stream_);
+#endif
     }
     cuda_lap() = delete;
 
     ~cuda_lap() noexcept
     {
+#ifndef CPU_ONLY
       cudaEventRecord(stop_, stream_);
       cudaEventSynchronize(stop_);
       float milliseconds = 0.0f;
       cudaEventElapsedTime(&milliseconds, start_, stop_);
       total_time_ += milliseconds / 1000.0;
+#endif
     }
   };
 
   cuda_timer()
   {
+#ifndef CPU_ONLY
     cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking);
     cudaEventCreate(&stop_);
     cudaEventCreate(&start_);
+#endif
   }
 
   ~cuda_timer() noexcept
   {
+#ifndef CPU_ONLY
     cudaEventDestroy(start_);
     cudaEventDestroy(stop_);
     cudaStreamDestroy(stream_);
+#endif
   }
 
   [[nodiscard]] auto stream() const -> cudaStream_t { return stream_; }
@@ -143,6 +157,8 @@ struct cuda_timer {
 
 inline auto cuda_info()
 {
+  std::vector<std::tuple<std::string, std::string>> props;
+#ifndef CPU_ONLY
   int dev, driver = 0, runtime = 0;
   cudaDriverGetVersion(&driver);
   cudaRuntimeGetVersion(&runtime);
@@ -150,7 +166,6 @@ inline auto cuda_info()
   cudaDeviceProp device_prop;
   cudaGetDevice(&dev);
   cudaGetDeviceProperties(&device_prop, dev);
-  std::vector<std::tuple<std::string, std::string>> props;
   props.emplace_back("gpu_name", std::string(device_prop.name));
   props.emplace_back("gpu_sm_count", std::to_string(device_prop.multiProcessorCount));
   props.emplace_back("gpu_sm_freq", std::to_string(device_prop.clockRate * 1e3));
@@ -162,6 +177,7 @@ inline auto cuda_info()
                      std::to_string(driver / 1000) + "." + std::to_string((driver % 100) / 10));
   props.emplace_back("gpu_runtime_version",
                      std::to_string(runtime / 1000) + "." + std::to_string((runtime % 100) / 10));
+#endif
   return props;
 }
 

From 9b588af004359ebb7890205f27ef8bef35b65620 Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Thu, 17 Aug 2023 11:06:48 +0200
Subject: [PATCH 05/70] Fix style

---
 cpp/bench/ann/CMakeLists.txt           | 8 +++++---
 cpp/bench/ann/src/common/benchmark.cpp | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 9761dd74e5..0acc862fad 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -242,9 +242,11 @@ set(_CMAKE_REQUIRED_INCLUDES_ORIG ${CMAKE_REQUIRED_INCLUDES})
 get_target_property(CMAKE_REQUIRED_INCLUDES ANN_BENCH INCLUDE_DIRECTORIES)
 CHECK_INCLUDE_FILE_CXX(nvtx3/nvToolsExt.h NVTX3_HEADERS_FOUND)
 set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_ORIG})
-target_compile_definitions(ANN_BENCH PRIVATE
-  $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}">
-  $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
+target_compile_definitions(
+  ANN_BENCH
+  PRIVATE
+    $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}">
+    $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
 )
 
 target_link_options(ANN_BENCH PRIVATE -export-dynamic)
diff --git a/cpp/bench/ann/src/common/benchmark.cpp b/cpp/bench/ann/src/common/benchmark.cpp
index b46623edbf..6424a36471 100644
--- a/cpp/bench/ann/src/common/benchmark.cpp
+++ b/cpp/bench/ann/src/common/benchmark.cpp
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "cuda_stub.hpp" // must go first
+#include "cuda_stub.hpp"  // must go first
 
 #include "ann_types.hpp"
 

From 6d6c17d6c98733128fc462893f17791afc7c17ab Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Thu, 17 Aug 2023 13:39:00 +0200
Subject: [PATCH 06/70] Adapt ANN benchmark python scripts

---
 bench/ann/data_export.py | 41 ++++++++++++++++++++++++++--------------
 bench/ann/run.py         | 34 +++++++++++++++++----------------
 2 files changed, 45 insertions(+), 30 deletions(-)

diff --git a/bench/ann/data_export.py b/bench/ann/data_export.py
index df48882840..5cff5d7642 100644
--- a/bench/ann/data_export.py
+++ b/bench/ann/data_export.py
@@ -16,22 +16,35 @@
 import argparse
 import os
 import subprocess
+import json
 
+from pathlib import Path
+
+def parse_filepaths(fs):
+    for p in fs:
+        if p.endswith(".json") and os.path.exists(p):
+            yield p
+        else:
+            for f in Path(p).rglob('*.json'):
+                yield f.as_posix()
 
 def export_results(output_filepath, recompute, groundtruth_filepath,
                    result_filepaths):
     print(f"Writing output file to: {output_filepath}")
-    ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
-                                         "cpp/bench/ann/scripts")
-    ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
-                                          "eval.pl")
-    if recompute:
-        p = subprocess.Popen([ann_bench_scripts_path, "-f", "-o", output_filepath,
-                              groundtruth_filepath] + result_filepaths)
-    else:
-        p = subprocess.Popen([ann_bench_scripts_path, "-o", output_filepath,
-                              groundtruth_filepath] + result_filepaths)
-    p.wait()
+
+    parsed_filepaths = parse_filepaths(result_filepaths)
+
+    with open(output_filepath, 'w') as out:
+        out.write("Algo,Recall,QPS\n")
+
+        for fp in parsed_filepaths:
+            with open(fp, 'r') as f:
+                data = json.load(f)
+                for benchmark_case in data["benchmarks"]:
+                    algo = benchmark_case["name"]
+                    recall = benchmark_case["Recall"]
+                    qps = benchmark_case["items_per_second"]
+                    out.write(f"{algo},{recall},{qps}\n")
 
 
 def main():
@@ -47,17 +60,17 @@ def main():
     parser.add_argument(
         "--dataset-path",
         help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"), 
+        default=os.path.join(os.getenv("RAFT_HOME"),
                              "bench", "ann", "data")
     )
-    
+
     args, result_filepaths = parser.parse_known_args()
 
     # if nothing is provided
     if len(result_filepaths) == 0:
         raise ValueError("No filepaths to results were provided")
 
-    groundtruth_filepath = os.path.join(args.dataset_path, args.dataset, 
+    groundtruth_filepath = os.path.join(args.dataset_path, args.dataset,
                                         "groundtruth.neighbors.ibin")
     export_results(args.output, args.recompute, groundtruth_filepath,
                    result_filepaths)
diff --git a/bench/ann/run.py b/bench/ann/run.py
index ebaef1e004..e14b437bd5 100644
--- a/bench/ann/run.py
+++ b/bench/ann/run.py
@@ -27,9 +27,8 @@ def validate_algorithm(algos_conf, algo):
 
 def find_executable(algos_conf, algo):
     executable = algos_conf[algo]["executable"]
-    conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann",
-                              executable)
-    build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
+    conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann", "ANN_BENCH")
+    build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", "ANN_BENCH")
     if os.path.exists(conda_path):
         return (executable, conda_path)
     elif os.path.exists(build_path):
@@ -48,29 +47,31 @@ def run_build_and_search(conf_filename, conf_file, executables_to_run,
             temp_conf = dict()
             temp_conf["dataset"] = conf_file["dataset"]
             temp_conf["search_basic_param"] = conf_file["search_basic_param"]
-            temp_conf["index"] = executables_to_run[(executable, 
+            temp_conf["index"] = executables_to_run[(executable,
                                                      ann_executable_path)]["index"]
             json.dump(temp_conf, f)
 
         if build:
             if force:
-                p = subprocess.Popen([ann_executable_path, "-b", "-f",
+                p = subprocess.Popen([ann_executable_path, "--build", "--overwrite",
                                     temp_conf_filepath])
                 p.wait()
             else:
-                p = subprocess.Popen([ann_executable_path, "-b",
+                p = subprocess.Popen([ann_executable_path, "--build",
                                     temp_conf_filepath])
                 p.wait()
 
         if search:
-            if force:
-                p = subprocess.Popen([ann_executable_path, "-s", "-f",
-                                      temp_conf_filepath])
-                p.wait()
-            else:
-                p = subprocess.Popen([ann_executable_path, "-s",
-                                      temp_conf_filepath])
-                p.wait()
+            legacy_result_folder = "result/" + temp_conf["dataset"]["name"]
+            os.makedirs(legacy_result_folder, exist_ok=True)
+            p = subprocess.Popen([
+                ann_executable_path,
+                "--search",
+                "--benchmark_counters_tabular",
+                "--benchmark_out_format=json",
+                f"--benchmark_out={legacy_result_folder}/{executable}.json",
+                temp_conf_filepath])
+            p.wait()
 
         os.remove(temp_conf_filepath)
 
@@ -95,7 +96,7 @@ def main():
     parser.add_argument(
         "--dataset-path",
         help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"), 
+        default=os.path.join(os.getenv("RAFT_HOME"),
                              "bench", "ann", "data")
     )
     parser.add_argument(
@@ -138,7 +139,7 @@ def main():
     conf_filename = conf_filepath.split("/")[-1]
     conf_filedir = "/".join(conf_filepath.split("/")[:-1])
     dataset_name = conf_filename.replace(".json", "")
-    dataset_path = os.path.join(args.dataset_path, dataset_name)
+    dataset_path = os.path.realpath(os.path.join(args.dataset_path, dataset_name))
     if not os.path.exists(conf_filepath):
         raise FileNotFoundError(conf_filename)
 
@@ -148,6 +149,7 @@ def main():
     # Replace base, query to dataset-path
     conf_file["dataset"]["base_file"] = os.path.join(dataset_path, "base.fbin")
     conf_file["dataset"]["query_file"] = os.path.join(dataset_path, "query.fbin")
+    conf_file["dataset"]["groundtruth_neighbors_file"] = os.path.join(dataset_path, "groundtruth.neighbors.ibin")
     # Ensure base and query files exist for dataset
     if not os.path.exists(conf_file["dataset"]["base_file"]):
         raise FileNotFoundError(conf_file["dataset"]["base_file"])

From b89b27de5415deb33cf5ad9668958305778fcf39 Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Thu, 17 Aug 2023 16:47:35 +0200
Subject: [PATCH 07/70] Make the default behavior to produce one executable per
 benchmark

---
 bench/ann/run.py                              |   4 +-
 cpp/bench/ann/CMakeLists.txt                  | 104 ++++++++++--------
 cpp/bench/ann/src/common/benchmark.hpp        |  26 +++--
 cpp/bench/ann/src/common/util.hpp             |   1 -
 cpp/bench/ann/src/faiss/faiss_benchmark.cu    |   5 +
 cpp/bench/ann/src/ggnn/ggnn_benchmark.cu      |   5 +
 .../ann/src/hnswlib/hnswlib_benchmark.cpp     |   6 +
 cpp/bench/ann/src/raft/raft_benchmark.cu      |   5 +
 8 files changed, 96 insertions(+), 60 deletions(-)

diff --git a/bench/ann/run.py b/bench/ann/run.py
index e14b437bd5..f1555e4725 100644
--- a/bench/ann/run.py
+++ b/bench/ann/run.py
@@ -27,8 +27,8 @@ def validate_algorithm(algos_conf, algo):
 
 def find_executable(algos_conf, algo):
     executable = algos_conf[algo]["executable"]
-    conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann", "ANN_BENCH")
-    build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", "ANN_BENCH")
+    conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann", executable)
+    build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
     if os.path.exists(conda_path):
         return (executable, conda_path)
     elif os.path.exists(build_path):
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 0acc862fad..cb54e97984 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -23,6 +23,7 @@ option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchm
 option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_SINGLE_EXE "Make a single executable with benchmark as shared library modules" OFF)
 
 find_package(Threads REQUIRED)
 
@@ -77,10 +78,16 @@ function(ConfigureAnnBench)
 
   set(BENCH_NAME ${ConfigureAnnBench_NAME}_ANN_BENCH)
 
-  add_library(${BENCH_NAME} SHARED ${ConfigureAnnBench_PATH})
-
-  string(TOLOWER ${BENCH_NAME} BENCH_LIB_NAME)
-  set_target_properties(${BENCH_NAME} PROPERTIES OUTPUT_NAME ${BENCH_LIB_NAME})
+  if(RAFT_ANN_BENCH_SINGLE_EXE)
+    add_library(${BENCH_NAME} SHARED ${ConfigureAnnBench_PATH})
+    string(TOLOWER ${BENCH_NAME} BENCH_LIB_NAME)
+    set_target_properties(${BENCH_NAME} PROPERTIES OUTPUT_NAME ${BENCH_LIB_NAME})
+    add_dependencies(${BENCH_NAME} ANN_BENCH)
+  else()
+    add_executable(${BENCH_NAME} ${ConfigureAnnBench_PATH})
+    target_compile_definitions(${BENCH_NAME} PRIVATE ANN_BENCH_BUILD_MAIN)
+    target_link_libraries(${BENCH_NAME} PRIVATE benchmark::benchmark)
+  endif()
 
   target_link_libraries(
     ${BENCH_NAME}
@@ -207,53 +214,54 @@ endif()
 
 # ##################################################################################################
 # * Dynamically-loading ANN_BENCH executable -------------------------------------------------------
+if (RAFT_ANN_BENCH_SINGLE_EXE)
+  add_executable(ANN_BENCH bench/ann/src/common/benchmark.cpp)
 
-add_executable(ANN_BENCH bench/ann/src/common/benchmark.cpp)
-
-# Build and link static version of the GBench to keep ANN_BENCH self-contained.
-get_target_property(TMP_PROP benchmark::benchmark SOURCES)
-add_library(benchmark_static STATIC ${TMP_PROP})
-get_target_property(TMP_PROP benchmark::benchmark INCLUDE_DIRECTORIES)
-target_include_directories(benchmark_static PUBLIC ${TMP_PROP})
-get_target_property(TMP_PROP benchmark::benchmark LINK_LIBRARIES)
-target_link_libraries(benchmark_static PUBLIC ${TMP_PROP})
+  # Build and link static version of the GBench to keep ANN_BENCH self-contained.
+  get_target_property(TMP_PROP benchmark::benchmark SOURCES)
+  add_library(benchmark_static STATIC ${TMP_PROP})
+  get_target_property(TMP_PROP benchmark::benchmark INCLUDE_DIRECTORIES)
+  target_include_directories(benchmark_static PUBLIC ${TMP_PROP})
+  get_target_property(TMP_PROP benchmark::benchmark LINK_LIBRARIES)
+  target_link_libraries(benchmark_static PUBLIC ${TMP_PROP})
 
-target_include_directories(ANN_BENCH PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+  target_include_directories(ANN_BENCH PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
 
-target_link_libraries(
-  ANN_BENCH PRIVATE nlohmann_json::nlohmann_json benchmark_static dl -static-libgcc
-                    -static-libstdc++ CUDA::nvtx3
-)
-set_target_properties(
-  ANN_BENCH
-  PROPERTIES # set target compile options
-             CXX_STANDARD 17
-             CXX_STANDARD_REQUIRED ON
-             CUDA_STANDARD 17
-             CUDA_STANDARD_REQUIRED ON
-             POSITION_INDEPENDENT_CODE ON
-             INTERFACE_POSITION_INDEPENDENT_CODE ON
-             BUILD_RPATH "\$ORIGIN"
-             INSTALL_RPATH "\$ORIGIN"
-)
+  target_link_libraries(
+    ANN_BENCH PRIVATE nlohmann_json::nlohmann_json benchmark_static dl -static-libgcc
+                      -static-libstdc++ CUDA::nvtx3
+  )
+  set_target_properties(
+    ANN_BENCH
+    PROPERTIES # set target compile options
+              CXX_STANDARD 17
+              CXX_STANDARD_REQUIRED ON
+              CUDA_STANDARD 17
+              CUDA_STANDARD_REQUIRED ON
+              POSITION_INDEPENDENT_CODE ON
+              INTERFACE_POSITION_INDEPENDENT_CODE ON
+              BUILD_RPATH "\$ORIGIN"
+              INSTALL_RPATH "\$ORIGIN"
+  )
 
-# Disable NVTX when the nvtx3 headers are missing
-set(_CMAKE_REQUIRED_INCLUDES_ORIG ${CMAKE_REQUIRED_INCLUDES})
-get_target_property(CMAKE_REQUIRED_INCLUDES ANN_BENCH INCLUDE_DIRECTORIES)
-CHECK_INCLUDE_FILE_CXX(nvtx3/nvToolsExt.h NVTX3_HEADERS_FOUND)
-set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_ORIG})
-target_compile_definitions(
-  ANN_BENCH
-  PRIVATE
-    $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}">
-    $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
-)
+  # Disable NVTX when the nvtx3 headers are missing
+  set(_CMAKE_REQUIRED_INCLUDES_ORIG ${CMAKE_REQUIRED_INCLUDES})
+  get_target_property(CMAKE_REQUIRED_INCLUDES ANN_BENCH INCLUDE_DIRECTORIES)
+  CHECK_INCLUDE_FILE_CXX(nvtx3/nvToolsExt.h NVTX3_HEADERS_FOUND)
+  set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_ORIG})
+  target_compile_definitions(
+    ANN_BENCH
+    PRIVATE
+      $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}">
+      $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
+  )
 
-target_link_options(ANN_BENCH PRIVATE -export-dynamic)
+  target_link_options(ANN_BENCH PRIVATE -export-dynamic)
 
-install(
-  TARGETS ANN_BENCH
-  COMPONENT ann_bench
-  DESTINATION bin/ann
-  EXCLUDE_FROM_ALL
-)
+  install(
+    TARGETS ANN_BENCH
+    COMPONENT ann_bench
+    DESTINATION bin/ann
+    EXCLUDE_FROM_ALL
+  )
+endif()
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index 3bfe381d3f..814d7e1760 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -15,8 +15,6 @@
  */
 #pragma once
 
-#include "cuda_stub.hpp"  // must go first
-
 #include "ann_types.hpp"
 #include "conf.hpp"
 #include "dataset.hpp"
@@ -35,6 +33,16 @@
 #include <unistd.h>
 #include <vector>
 
+#ifdef ANN_BENCH_BUILD_MAIN
+#ifdef CPU_ONLY
+#define CUDART_FOUND false
+#else
+#define CUDART_FOUND true
+#endif
+#else
+#define CUDART_FOUND (cudart.found())
+#endif
+
 namespace raft::bench::ann {
 
 static inline std::unique_ptr<AnnBase> current_algo{nullptr};
@@ -133,8 +141,8 @@ void bench_build(::benchmark::State& state,
   {
     nvtx_case nvtx{state.name()};
     for (auto _ : state) {
-      auto ntx_lap = nvtx.lap();
-      auto gpu_lap = gpu_timer.lap();
+      [[maybe_unused]] auto ntx_lap = nvtx.lap();
+      [[maybe_unused]] auto gpu_lap = gpu_timer.lap();
       try {
         algo->build(base_set, index_size, gpu_timer.stream());
       } catch (const std::exception& e) {
@@ -215,8 +223,8 @@ void bench_search(::benchmark::State& state,
     nvtx_case nvtx{state.name()};
     for (auto _ : state) {
       // measure the GPU time using the RAII helper
-      auto ntx_lap = nvtx.lap();
-      auto gpu_lap = gpu_timer.lap();
+      [[maybe_unused]] auto ntx_lap = nvtx.lap();
+      [[maybe_unused]] auto gpu_lap = gpu_timer.lap();
       // run the search
       try {
         algo->search(query_set + batch_offset * dataset->dim(),
@@ -235,7 +243,7 @@ void bench_search(::benchmark::State& state,
   }
   state.SetItemsProcessed(queries_processed);
   state.counters.insert({{"k", k}, {"n_queries", n_queries}});
-  if (cudart.found()) {
+  if (CUDART_FOUND) {
     state.counters.insert({{"GPU Time", gpu_timer.total_time() / state.iterations()},
                            {"GPU QPS", queries_processed / gpu_timer.total_time()}});
   }
@@ -338,7 +346,7 @@ void dispatch_benchmark(const Configuration& conf,
                         std::string index_prefix,
                         kv_series override_kv)
 {
-  if (cudart.found()) {
+  if (CUDART_FOUND) {
     for (auto [key, value] : cuda_info()) {
       ::benchmark::AddCustomContext(key, value);
     }
@@ -487,7 +495,7 @@ inline auto run_main(int argc, char** argv) -> int
     return -1;
   }
 
-  if (!cudart.found()) { log_warn("cudart library is not found, GPU-based indices won't work."); }
+  if (!CUDART_FOUND) { log_warn("cudart library is not found, GPU-based indices won't work."); }
 
   Configuration conf(conf_stream);
   std::string dtype = conf.get_dataset_conf().dtype;
diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp
index 19fed52d77..faf440071d 100644
--- a/cpp/bench/ann/src/common/util.hpp
+++ b/cpp/bench/ann/src/common/util.hpp
@@ -17,7 +17,6 @@
 
 #include "ann_types.hpp"
 
-#include "cuda_stub.hpp"
 #ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
 #include <nvtx3/nvToolsExt.h>
 #endif
diff --git a/cpp/bench/ann/src/faiss/faiss_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
index 2733a9419c..619565d107 100644
--- a/cpp/bench/ann/src/faiss/faiss_benchmark.cu
+++ b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
@@ -146,3 +146,8 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 REGISTER_ALGO_INSTANCE(float);
 REGISTER_ALGO_INSTANCE(std::int8_t);
 REGISTER_ALGO_INSTANCE(std::uint8_t);
+
+#ifdef ANN_BENCH_BUILD_MAIN
+#include "../common/benchmark.hpp"
+int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
+#endif
diff --git a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
index 636bf753b1..99481c2921 100644
--- a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
+++ b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
@@ -121,3 +121,8 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 REGISTER_ALGO_INSTANCE(float);
 REGISTER_ALGO_INSTANCE(std::int8_t);
 REGISTER_ALGO_INSTANCE(std::uint8_t);
+
+#ifdef ANN_BENCH_BUILD_MAIN
+#include "../common/benchmark.hpp"
+int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
+#endif
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
index bb93a83117..be5b72c5f6 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
@@ -114,3 +114,9 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 REGISTER_ALGO_INSTANCE(float);
 REGISTER_ALGO_INSTANCE(std::int8_t);
 REGISTER_ALGO_INSTANCE(std::uint8_t);
+
+#ifdef ANN_BENCH_BUILD_MAIN
+#define CPU_ONLY
+#include "../common/benchmark.hpp"
+int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
+#endif
diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu
index 3245e0e064..823fa3f2f3 100644
--- a/cpp/bench/ann/src/raft/raft_benchmark.cu
+++ b/cpp/bench/ann/src/raft/raft_benchmark.cu
@@ -253,3 +253,8 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 REGISTER_ALGO_INSTANCE(float);
 REGISTER_ALGO_INSTANCE(std::int8_t);
 REGISTER_ALGO_INSTANCE(std::uint8_t);
+
+#ifdef ANN_BENCH_BUILD_MAIN
+#include "../common/benchmark.hpp"
+int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
+#endif

From 163a40c4e7945492a276cce9cb0ccec1ad5fbb5e Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Thu, 17 Aug 2023 17:02:48 +0200
Subject: [PATCH 08/70] Fix style problems / pre-commit

---
 bench/ann/run.py             |  3 ++-
 cpp/bench/ann/CMakeLists.txt | 22 ++++++++++++----------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/bench/ann/run.py b/bench/ann/run.py
index f1555e4725..47f4d382d4 100644
--- a/bench/ann/run.py
+++ b/bench/ann/run.py
@@ -27,7 +27,8 @@ def validate_algorithm(algos_conf, algo):
 
 def find_executable(algos_conf, algo):
     executable = algos_conf[algo]["executable"]
-    conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann", executable)
+    conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann",
+                              executable)
     build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
     if os.path.exists(conda_path):
         return (executable, conda_path)
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index cb54e97984..119a5c0a73 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -23,7 +23,9 @@ option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchm
 option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_SINGLE_EXE "Make a single executable with benchmark as shared library modules" OFF)
+option(RAFT_ANN_BENCH_SINGLE_EXE
+       "Make a single executable with benchmark as shared library modules" OFF
+)
 
 find_package(Threads REQUIRED)
 
@@ -214,7 +216,7 @@ endif()
 
 # ##################################################################################################
 # * Dynamically-loading ANN_BENCH executable -------------------------------------------------------
-if (RAFT_ANN_BENCH_SINGLE_EXE)
+if(RAFT_ANN_BENCH_SINGLE_EXE)
   add_executable(ANN_BENCH bench/ann/src/common/benchmark.cpp)
 
   # Build and link static version of the GBench to keep ANN_BENCH self-contained.
@@ -234,14 +236,14 @@ if (RAFT_ANN_BENCH_SINGLE_EXE)
   set_target_properties(
     ANN_BENCH
     PROPERTIES # set target compile options
-              CXX_STANDARD 17
-              CXX_STANDARD_REQUIRED ON
-              CUDA_STANDARD 17
-              CUDA_STANDARD_REQUIRED ON
-              POSITION_INDEPENDENT_CODE ON
-              INTERFACE_POSITION_INDEPENDENT_CODE ON
-              BUILD_RPATH "\$ORIGIN"
-              INSTALL_RPATH "\$ORIGIN"
+               CXX_STANDARD 17
+               CXX_STANDARD_REQUIRED ON
+               CUDA_STANDARD 17
+               CUDA_STANDARD_REQUIRED ON
+               POSITION_INDEPENDENT_CODE ON
+               INTERFACE_POSITION_INDEPENDENT_CODE ON
+               BUILD_RPATH "\$ORIGIN"
+               INSTALL_RPATH "\$ORIGIN"
   )
 
   # Disable NVTX when the nvtx3 headers are missing

From 1daf2bfd9874f4509cb29bf7dc8e2fba935520d4 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Thu, 24 Aug 2023 15:46:55 -0400
Subject: [PATCH 09/70] Adding k and batch-size options to run.py

---
 .gitignore                              |  1 +
 bench/ann/run.py                        | 26 ++++++++++-
 docs/source/ann_benchmarks_low_level.md |  1 -
 docs/source/raft_ann_benchmarks.md      | 58 +++++++++++++++++++++++--
 4 files changed, 79 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index c2528d2cd0..7eb29cbcb7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ log
 dask-worker-space/
 *.egg-info/
 *.bin
+bench/ann/data
 
 ## scikit-build
 _skbuild
diff --git a/bench/ann/run.py b/bench/ann/run.py
index 47f4d382d4..60b9a012ad 100644
--- a/bench/ann/run.py
+++ b/bench/ann/run.py
@@ -20,6 +20,16 @@
 import yaml
 
 
+def positive_int(input_str: str) -> int:
+    try:
+        i = int(input_str)
+        if i < 1:
+            raise ValueError
+    except ValueError:
+        raise argparse.ArgumentTypeError(f"{input_str} is not a positive integer")
+
+    return i
+
 def validate_algorithm(algos_conf, algo):
     algos_conf_keys = set(algos_conf.keys())
     return algo in algos_conf_keys and not algos_conf[algo]["disabled"]
@@ -39,7 +49,7 @@ def find_executable(algos_conf, algo):
 
 
 def run_build_and_search(conf_filename, conf_file, executables_to_run,
-                         force, conf_filedir, build, search):
+                         force, conf_filedir, build, search, k, batch_size):
     for executable, ann_executable_path in executables_to_run.keys():
         # Need to write temporary configuration
         temp_conf_filename = f"temporary_executable_{conf_filename}"
@@ -70,6 +80,8 @@ def run_build_and_search(conf_filename, conf_file, executables_to_run,
                 "--search",
                 "--benchmark_counters_tabular",
                 "--benchmark_out_format=json",
+                "--override_kv=k:%s" % k,
+                "--override_kv=n_queries:%s" % batch_size,
                 f"--benchmark_out={legacy_result_folder}/{executable}.json",
                 temp_conf_filepath])
             p.wait()
@@ -85,6 +97,13 @@ def main():
 
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument(
+        "-k", "--count", default=10, type=positive_int, help="the number of nearest neighbors to search for"
+    )
+    parser.add_argument(
+        "-bs", "--batch-size", default=10000, type=positive_int, help="number of query vectors to use in each query trial"
+    )
     parser.add_argument(
         "--configuration",
         help="path to configuration file for a dataset",
@@ -132,6 +151,9 @@ def main():
         build = args.build
         search = args.search
 
+    k = args.count
+    batch_size = args.batch_size
+
     # Read configuration file associated to dataset
     if args.configuration:
         conf_filepath = args.configuration
@@ -205,7 +227,7 @@ def main():
             executables_to_run[executable_path]["index"][pos] = index
 
     run_build_and_search(conf_filename, conf_file, executables_to_run,
-                         args.force, conf_filedir, build, search)
+                         args.force, conf_filedir, build, search, k, batch_size)
 
 
 if __name__ == "__main__":
diff --git a/docs/source/ann_benchmarks_low_level.md b/docs/source/ann_benchmarks_low_level.md
index d7cc2a3310..d08a3a1791 100644
--- a/docs/source/ann_benchmarks_low_level.md
+++ b/docs/source/ann_benchmarks_low_level.md
@@ -100,7 +100,6 @@ To run a benchmark executable, like `RAFT_IVF_FLAT_ANN_BENCH`, a JSON configurat
 * `index` section specifies an array of configurations for index building and searching:
     - `build_param` and `search_params` are parameters for building and searching, respectively. `search_params` is an array since we will search with different parameters to get different recall values.
     - `file` is the file name of index. Building will save built index to this file, while searching will load this file.
-    - if `multigpu` is specified, multiple GPUs will be used for index build and search.
     - if `refine_ratio` is specified, refinement, as a post-processing step of search, will be done. It's for algorithms that compress vectors. For example, if `"refine_ratio" : 2` is set, 2`k` results are first computed, then exact distances of them are computed using original uncompressed vectors, and finally top `k` results among them are kept.
 
 
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index 72e1dfa36a..b1c8813ead 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -219,7 +219,58 @@ The figure below is the resulting plot of running our benchmarks as of August 20
 
 ![Throughput vs recall plot comparing popular ANN algorithms with RAFT's at batch size 10](../../img/raft-vector-search-batch-10.png)
 
+## Creating and customizing dataset configurations
+
+A single configuration file will often define a set of algorithms, with associated index and search parameters, for a specific dataset. A configuration file uses json format with 4 major parts:
+1. Dataset information
+2. Algorithm information
+3. Index parameters
+4. Search parameters
+
+Below is a simple example configuration file for the 1M-scale `sift-128-euclidean` dataset:
+
+```json
+{
+  "dataset": {
+    "name": "sift-128-euclidean",
+    "base_file": "sift-128-euclidean/base.fbin",
+    "query_file": "sift-128-euclidean/query.fbin", 
+    "subset_size": 1000000,
+    "groundtruth_neighbors_file": "sift-128-euclidean/groundtruth.neighbors.ibin",
+    "distance": "euclidean"
+  },
+  "index": []
+}
+```
+
+The `index` section will contain a list of index objects, each of which will have the following form:
+```json
+{
+   "name": "algo_name.unique_index_name",
+   "algo": "algo_name",
+   "file": "sift-128-euclidean/algo_name/param1_val1-param2_val2",
+   "build_param": { "param1": "val1", "param2": "val2" },
+   "search_params": { "search_param1": "search_val1" }
+}
+```
+
+The table below contains the possible settings for the `algo` field. Each unique algorithm will have its own set of `build_param` and `search_params` settings. The [ANN Algorithm Parameter Tuning Guide]() contains detailed instructions on choosing build and search parameters for each supported algorithm.
+
+| Library   | Algorithms                                   |
+|-----------|----------------------------------------------|
+| FAISS | `faiss_gpu_ivf_flat`, `faiss_gpu_ivf_pq`     |
+| GGNN | `ggnn` |
+| HNSWlib | `hnswlib` |
+| RAFT    | `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq` |
+
+
+
+
+By default, the index will be placed in `bench/ann/data/<dataset_name>/index/<name>`. Using `sift-128-euclidean` for the dataset with the `algo` example above, the indexes would be placed in `bench/ann/data/sift-128-euclidean/index/algo_name/param1_val1-param2_val2`.
+
+
 ## Adding a new ANN algorithm
+
 ### Implementation and Configuration
 Implementation of a new algorithm should be a C++ class that inherits `class ANN` (defined in `cpp/bench/ann/src/ann.h`) and implements all the pure virtual functions.
 
@@ -244,10 +295,10 @@ public:
 };
 ```
 
-<a id='json-index-config'></a>The benchmark program uses JSON configuration file. To add the new algorithm to the benchmark, need be able to specify `build_param`, whose value is a JSON object, and `search_params`, whose value is an array of JSON objects, for this algorithm in configuration file. Still take the configuration for `HnswLib` as an example:
+<a id='json-index-config'></a>The benchmark program uses JSON format in a configuration file to specify indexes to build, along with the build and search parameters. To add the new algorithm to the benchmark, need be able to specify `build_param`, whose value is a JSON object, and `search_params`, whose value is an array of JSON objects, for this algorithm in configuration file. The `build_param` and `search_param` arguments will vary depending on the algorithm.  Take the configuration for `HnswLib` as an example:
 ```json
 {
-  "name" : "...",
+  "name" : "hnswlib.M12.ef500.th32",
   "algo" : "hnswlib",
   "build_param": {"M":12, "efConstruction":500, "numThreads":32},
   "file" : "/path/to/file",
@@ -259,7 +310,6 @@ public:
   "search_result_file" : "/path/to/file"
 },
 ```
-
 How to interpret these JSON objects is totally left to the implementation and should be specified in `cpp/bench/ann/src/factory.cuh`:
 1. First, add two functions for parsing JSON object to `struct BuildParam` and `struct SearchParam`, respectively:
     ```c++
@@ -283,7 +333,7 @@ How to interpret these JSON objects is totally left to the implementation and sh
     }
     ```
 
-2. Next, add corresponding `if` case to functions `create_algo()` and `create_search_param()` by calling parsing functions. The string literal in `if` condition statement must be the same as the value of `algo` in configuration file. For example,
+2. Next, add corresponding `if` case to functions `create_algo()` (in `bench/ann/) and `create_search_param()` by calling parsing functions. The string literal in `if` condition statement must be the same as the value of `algo` in configuration file. For example,
     ```c++
       // JSON configuration file contains a line like:  "algo" : "hnswlib"
       if (algo == "hnswlib") {

From 04893c983a4424c41e6f168b2d2d7ab0b3ec9983 Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Thu, 24 Aug 2023 22:51:24 +0200
Subject: [PATCH 10/70] Add dataset_memory_type/query_memory_type as
 build/search parameters

---
 cpp/bench/ann/src/common/ann_types.hpp        | 15 ++++++++++-
 cpp/bench/ann/src/common/benchmark.hpp        | 15 +++++++++--
 cpp/bench/ann/src/faiss/faiss_wrapper.h       |  2 +-
 cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh       |  4 +--
 cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h   |  2 +-
 cpp/bench/ann/src/raft/raft_cagra_wrapper.h   | 25 +++++++++++--------
 .../ann/src/raft/raft_ivf_flat_wrapper.h      |  2 +-
 cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h  |  6 +++--
 cpp/bench/ann/src/raft/raft_wrapper.h         |  7 +++---
 9 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp
index baa1f3fd2f..e0c22d1798 100644
--- a/cpp/bench/ann/src/common/ann_types.hpp
+++ b/cpp/bench/ann/src/common/ann_types.hpp
@@ -50,6 +50,19 @@ inline auto parse_metric(const std::string& metric_str) -> Metric
   }
 }
 
+inline auto parse_memory_type(const std::string& memory_type) -> MemoryType
+{
+  if (memory_type == "host") {
+    return MemoryType::Host;
+  } else if (memory_type == "mmap") {
+    return MemoryType::HostMmap;
+  } else if (memory_type == "device") {
+    return MemoryType::Device;
+  } else {
+    throw std::runtime_error("invalid memory type: '" + memory_type + "'");
+  }
+}
+
 struct AlgoProperty {
   MemoryType dataset_memory_type;
   // neighbors/distances should have same memory type as queries
@@ -91,7 +104,7 @@ class ANN : public AnnBase {
   virtual void save(const std::string& file) const = 0;
   virtual void load(const std::string& file)       = 0;
 
-  virtual AlgoProperty get_property() const = 0;
+  virtual AlgoProperty get_preference() const = 0;
 
   // Some algorithms don't save the building dataset in their indices.
   // So they should be given the access to that dataset during searching.
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index 814d7e1760..6a8c6b933e 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -109,6 +109,17 @@ inline void dump_parameters(::benchmark::State& state, nlohmann::json params)
   if (!label_empty) { state.SetLabel(label); }
 }
 
+inline auto parse_algo_property(AlgoProperty prop, const nlohmann::json& conf) -> AlgoProperty
+{
+  if (conf.contains("dataset_memory_type")) {
+    prop.dataset_memory_type = parse_memory_type(conf.at("dataset_memory_type"));
+  }
+  if (conf.contains("query_memory_type")) {
+    prop.query_memory_type = parse_memory_type(conf.at("query_memory_type"));
+  }
+  return prop;
+};
+
 template <typename T>
 void bench_build(::benchmark::State& state,
                  std::shared_ptr<const Dataset<T>> dataset,
@@ -132,7 +143,7 @@ void bench_build(::benchmark::State& state,
     return state.SkipWithError("Failed to create an algo: " + std::string(e.what()));
   }
 
-  const auto algo_property = algo->get_property();
+  const auto algo_property = parse_algo_property(algo->get_preference(), index.build_param);
 
   const T* base_set      = dataset->base_set(algo_property.dataset_memory_type);
   std::size_t index_size = dataset->base_set_size();
@@ -200,7 +211,7 @@ void bench_search(::benchmark::State& state,
   }
   algo->set_search_param(*search_param);
 
-  const auto algo_property = algo->get_property();
+  const auto algo_property = parse_algo_property(algo->get_preference(), sp_json);
   const T* query_set       = dataset->query_set(algo_property.query_memory_type);
   buf<float> distances{algo_property.query_memory_type, k * query_set_size};
   buf<std::size_t> neighbors{algo_property.query_memory_type, k * query_set_size};
diff --git a/cpp/bench/ann/src/faiss/faiss_wrapper.h b/cpp/bench/ann/src/faiss/faiss_wrapper.h
index 6c367ba522..0dc06f99a5 100644
--- a/cpp/bench/ann/src/faiss/faiss_wrapper.h
+++ b/cpp/bench/ann/src/faiss/faiss_wrapper.h
@@ -98,7 +98,7 @@ class FaissGpu : public ANN<T> {
               float* distances,
               cudaStream_t stream = 0) const final;
 
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
     // to enable building big dataset which is larger than GPU memory
diff --git a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
index 44986980fe..74c7cddc3c 100644
--- a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
+++ b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
@@ -75,7 +75,7 @@ class Ggnn : public ANN<T> {
   void save(const std::string& file) const override { impl_->save(file); }
   void load(const std::string& file) override { impl_->load(file); }
 
-  AlgoProperty get_property() const override { return impl_->get_property(); }
+  AlgoProperty get_preference() const override { return impl_->get_preference(); }
 
   void set_search_dataset(const T* dataset, size_t nrow) override
   {
@@ -136,7 +136,7 @@ class GgnnImpl : public ANN<T> {
   void save(const std::string& file) const override;
   void load(const std::string& file) override;
 
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
     property.dataset_memory_type = MemoryType::Device;
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
index 2a7a984a8c..5cd33ef94d 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
@@ -188,7 +188,7 @@ class HnswLib : public ANN<T> {
   void save(const std::string& path_to_index) const override;
   void load(const std::string& path_to_index) override;
 
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
     property.dataset_memory_type = MemoryType::Host;
diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
index 4e3d3a7a58..9e9534de36 100644
--- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
@@ -29,6 +29,7 @@
 #include <raft/neighbors/cagra.cuh>
 #include <raft/neighbors/cagra_serialize.cuh>
 #include <raft/neighbors/cagra_types.hpp>
+#include <raft/spatial/knn/detail/ann_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
 #include <stdexcept>
@@ -80,7 +81,7 @@ class RaftCagra : public ANN<T> {
               cudaStream_t stream = 0) const override;
 
   // to enable dataset access from GPU memory
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
     property.dataset_memory_type = MemoryType::HostMmap;
@@ -104,16 +105,20 @@ class RaftCagra : public ANN<T> {
 template <typename T, typename IdxT>
 void RaftCagra<T, IdxT>::build(const T* dataset, size_t nrow, cudaStream_t)
 {
-  if (get_property().dataset_memory_type != MemoryType::Device) {
-    auto dataset_view =
-      raft::make_host_matrix_view<const T, int64_t>(dataset, IdxT(nrow), dimension_);
-    index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view));
-  } else {
-    auto dataset_view =
-      raft::make_device_matrix_view<const T, int64_t>(dataset, IdxT(nrow), dimension_);
-    index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view));
+  switch (raft::spatial::knn::detail::utils::check_pointer_residency(dataset)) {
+    case raft::spatial::knn::detail::utils::pointer_residency::host_only: {
+      auto dataset_view =
+        raft::make_host_matrix_view<const T, int64_t>(dataset, IdxT(nrow), dimension_);
+      index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view));
+      return;
+    }
+    default: {
+      auto dataset_view =
+        raft::make_device_matrix_view<const T, int64_t>(dataset, IdxT(nrow), dimension_);
+      index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view));
+      return;
+    }
   }
-  return;
 }
 
 template <typename T, typename IdxT>
diff --git a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
index f249eb0395..da457e32f1 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
@@ -80,7 +80,7 @@ class RaftIvfFlatGpu : public ANN<T> {
               cudaStream_t stream = 0) const override;
 
   // to enable dataset access from GPU memory
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
     property.dataset_memory_type = MemoryType::Device;
diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
index dcb42c7c9c..0d4bca75cc 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
@@ -25,6 +25,7 @@
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/neighbors/ivf_pq_types.hpp>
+#include <raft/spatial/knn/detail/ann_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <raft_runtime/neighbors/ivf_pq.hpp>
 #include <raft_runtime/neighbors/refine.hpp>
@@ -81,7 +82,7 @@ class RaftIvfPQ : public ANN<T> {
               cudaStream_t stream = 0) const override;
 
   // to enable dataset access from GPU memory
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
     property.dataset_memory_type = MemoryType::Host;
@@ -161,7 +162,8 @@ void RaftIvfPQ<T, IdxT>::search(const T* queries,
     raft::runtime::neighbors::ivf_pq::search(
       handle_, search_params_, *index_, queries_v, candidates.view(), distances_tmp.view());
 
-    if (get_property().dataset_memory_type == MemoryType::Device) {
+    if (raft::spatial::knn::detail::utils::check_pointer_residency(dataset_.data_handle()) ==
+        raft::spatial::knn::detail::utils::pointer_residency::device_only) {
       auto queries_v =
         raft::make_device_matrix_view<const T, IdxT>(queries, batch_size, index_->dim());
       auto neighbors_v = raft::make_device_matrix_view<IdxT, IdxT>((IdxT*)neighbors, batch_size, k);
diff --git a/cpp/bench/ann/src/raft/raft_wrapper.h b/cpp/bench/ann/src/raft/raft_wrapper.h
index 01f206ab70..c8d98460b7 100644
--- a/cpp/bench/ann/src/raft/raft_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_wrapper.h
@@ -65,12 +65,11 @@ class RaftGpu : public ANN<T> {
               cudaStream_t stream = 0) const final;
 
   // to enable dataset access from GPU memory
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = MemoryType::Device;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = true;
+    property.dataset_memory_type = MemoryType::Device;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
   void set_search_dataset(const T* dataset, size_t nrow) override;

From 30f7467bdf1ef9f94e910f77ed75ef92ff93ef13 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Thu, 24 Aug 2023 17:03:05 -0400
Subject: [PATCH 11/70] Tuning guide

---
 docs/source/ann_benchmarks_param_tuning.md | 13 +++++++++++++
 docs/source/raft_ann_benchmarks.md         |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100644 docs/source/ann_benchmarks_param_tuning.md

diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
new file mode 100644
index 0000000000..1391f261cb
--- /dev/null
+++ b/docs/source/ann_benchmarks_param_tuning.md
@@ -0,0 +1,13 @@
+# ANN Benchmarks Parameter Tuning Guide
+
+This guide outlines the various parameter settings that can be specified in [RAFT ANN Benchmark](raft_ann_benchmarks.md) json configuration files and explains the impact they have on corresponding algorithms to help inform their settings for benchmarking across desired levels of recall. 
+
+
+| Algorithm           | Parameter Options                            |
+|---------------------|----------------------------------------------|
+| `faiss_gpu_ivf_flat` | `{  }`                                       | `faiss_gpu_ivf_flat`, `faiss_gpu_ivf_pq` |
+| GGNN                | `ggnn`                                       |
+| HNSWlib             | `hnswlib`                                    |
+| RAFT                | `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq` |
+
+
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index b1c8813ead..f4af3c8787 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -254,7 +254,7 @@ The `index` section will contain a list of index objects, each of which will hav
 }
 ```
 
-The table below contains the possible settings for the `algo` field. Each unique algorithm will have its own set of `build_param` and `search_params` settings. The [ANN Algorithm Parameter Tuning Guide]() contains detailed instructions on choosing build and search parameters for each supported algorithm.
+The table below contains the possible settings for the `algo` field. Each unique algorithm will have its own set of `build_param` and `search_params` settings. The [ANN Algorithm Parameter Tuning Guide](ann_benchmarks_param_tuning.md) contains detailed instructions on choosing build and search parameters for each supported algorithm.
 
 | Library   | Algorithms                                   |
 |-----------|----------------------------------------------|

From f927f6927bbc1a1288617df5f00850e1d4c32e89 Mon Sep 17 00:00:00 2001
From: divyegala <divyegala@gmail.com>
Date: Thu, 24 Aug 2023 14:18:45 -0700
Subject: [PATCH 12/70] compiling, index building successful, search failing

---
 bench/ann/algos.yaml         |  3 +++
 bench/ann/run.py             | 22 ++++++++++++++--------
 cpp/bench/ann/CMakeLists.txt | 19 +++++--------------
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/bench/ann/algos.yaml b/bench/ann/algos.yaml
index 16a6c1a895..5f554fc46b 100644
--- a/bench/ann/algos.yaml
+++ b/bench/ann/algos.yaml
@@ -10,6 +10,9 @@ faiss_gpu_ivf_pq:
 faiss_gpu_ivf_sq:
   executable: FAISS_IVF_PQ_ANN_BENCH
   disabled: false
+faiss_gpu_bfknn:
+  executable: FAISS_BFKNN_ANN_BENCH
+  disabled: false
 raft_ivf_flat:
   executable: RAFT_IVF_FLAT_ANN_BENCH
   disabled: false
diff --git a/bench/ann/run.py b/bench/ann/run.py
index dad1ad7a62..e64148abd8 100644
--- a/bench/ann/run.py
+++ b/bench/ann/run.py
@@ -75,19 +75,19 @@ def run_build_and_search(conf_file, conf_filename, conf_filedir,
             p.wait()
 
         if search:
-            # legacy_result_folder = "result/" + conf_file["dataset"]["name"]
-            # os.makedirs(legacy_result_folder, exist_ok=True)
+            legacy_result_folder = os.path.join(dataset_path, conf_file['dataset']['name'], 'result')
+            os.makedirs(legacy_result_folder, exist_ok=True)
             cmd = [ann_executable_path,
                    "--search",
                    "--data_prefix="+dataset_path,
                    "--benchmark_counters_tabular",
-                   "--benchmark_out_format=json",
+                   "--benchmark_out_format=csv",
                    "--override_kv=k:%s" % k,
                    "--override_kv=n_queries:%s" % batch_size,
-                   "--benchmark_out_format=csv",
-                   f"--benchmark_out={os.path.join(dataset_path, 'result.csv')}"]
+                   f"--benchmark_out={os.path.join(dataset_path, conf_file['dataset']['name'], 'result', f'{executable}.csv')}"]
             if force:
                 cmd = cmd + ["--overwrite"]
+            cmd = cmd + [temp_conf_filepath]
             print(cmd)
             p = subprocess.Popen(cmd)
             p.wait()
@@ -171,11 +171,11 @@ def main():
     conf_filename = conf_filepath.split("/")[-1]
     conf_filedir = "/".join(conf_filepath.split("/")[:-1])
     dataset_name = conf_filename.replace(".json", "")
-    dataset_path = os.path.realpath(os.path.join(args.dataset_path, dataset_name))
+    dataset_path = args.dataset_path
     if not os.path.exists(conf_filepath):
         raise FileNotFoundError(conf_filename)
-    if not os.path.exists(dataset_path):
-        raise FileNotFoundError(dataset_path)
+    if not os.path.exists(os.path.join(args.dataset_path, dataset_name)):
+        raise FileNotFoundError(os.path.join(args.dataset_path, dataset_name))
 
     with open(conf_filepath, "r") as f:
         conf_file = json.load(f)
@@ -219,6 +219,12 @@ def main():
                     executables_to_run[executable_path] = {"index": []}
                 executables_to_run[executable_path]["index"].append(index)
 
+    # Replace index to dataset path
+    for executable_path in executables_to_run:
+        for pos, index in enumerate(executables_to_run[executable_path]["index"]):
+            index["file"] = os.path.join(dataset_path, dataset_name, "index", index["name"])
+            executables_to_run[executable_path]["index"][pos] = index
+
     print(executables_to_run)
 
     run_build_and_search(conf_file, conf_filename, conf_filedir, 
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index ed067de064..119a5c0a73 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -15,10 +15,9 @@
 # ##################################################################################################
 # * compiler function -----------------------------------------------------------------------------
 
+option(RAFT_ANN_BENCH_USE_FAISS_BFKNN "Include faiss' brute-force knn algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_FAISS_IVF_SQ "Include faiss' brute-force knn algorithm in benchmark" ON)
-option(RAFT_ANN_BENCH_USE_FAISS_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON)
@@ -193,26 +192,18 @@ endif()
 
 if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT)
   ConfigureAnnBench(
-    NAME FAISS_GPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
+    NAME FAISS_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
   )
 endif()
 
 if(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ)
   ConfigureAnnBench(
-    NAME FAISS_GPU_IVF_PQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
+    NAME FAISS_IVF_PQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
   )
 endif()
 
-if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT)
-  ConfigureAnnBench(
-    NAME FAISS_GPU_IVF_SQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
-  )
-endif()
-
-if(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ)
-  ConfigureAnnBench(
-    NAME FAISS_GPU_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss
-  )
+if(RAFT_ANN_BENCH_USE_FAISS_BFKNN)
+  ConfigureAnnBench(NAME FAISS_BFKNN PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss)
 endif()
 
 if(RAFT_ANN_BENCH_USE_GGNN)

From 2f19c440591d31f42324c1a171e5f2971a4a747f Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 24 Aug 2023 22:36:27 -0500
Subject: [PATCH 13/70] FEA first commit rebasing changes on gbench branch

---
 build.sh                                      |   18 +-
 ci/build_python.sh                            |   19 +-
 .../all_cuda-118_arch-x86_64.yaml             |    6 +-
 .../all_cuda-120_arch-x86_64.yaml             |    6 +-
 conda/recipes/libraft/meta.yaml               |   59 -
 .../build_raft_nn_bench_cpu.sh                |    5 +
 conda/recipes/raft-ann-bench-cpu/meta.yaml    |   65 +
 .../build.sh}                                 |    0
 .../raft-ann-bench/conda_build_config.yaml    |   73 +
 conda/recipes/raft-ann-bench/meta.yaml        |   96 ++
 conda/recipes/raft-dask/meta.yaml             |    6 +-
 cpp/CMakeLists.txt                            |   56 +-
 cpp/bench/ann/CMakeLists.txt                  |   47 +-
 dependencies.yaml                             |   28 +-
 python/raft-ann-bench/LICENSE                 |    1 +
 python/raft-ann-bench/pyproject.toml          |   57 +
 .../raft-ann-bench/raft-ann-bench/__init__.py |    0
 .../raft-ann-bench/data_export/__main__.py    |   80 +
 .../raft-ann-bench/data_export/eval.pl        |  430 ++++++
 .../raft-ann-bench/get_dataset/__main__.py    |   93 ++
 .../get_dataset}/fbin_to_f16bin.py            |   21 +-
 .../get_dataset}/hdf5_to_fbin.py              |   21 +-
 .../raft-ann-bench/plot/__main__.py           |  240 +++
 .../raft-ann-bench/run/__main__.py            |  234 +++
 .../raft-ann-bench/run/algos-cpu.yaml         |   30 +
 .../raft-ann-bench/run/algos.yaml             |   30 +
 .../raft-ann-bench/run/conf/bigann-100M.json  |  202 +++
 .../raft-ann-bench/run/conf/deep-100M.json    |  849 +++++++++++
 .../raft-ann-bench/run/conf/deep-1B.json      |   38 +
 .../run/conf/glove-100-inner.json             |  793 ++++++++++
 .../run/conf/sift-128-euclidean.json          | 1351 +++++++++++++++++
 .../split_groundtruth/__main__.py             |   47 +
 .../split_groundtruth}/split_groundtruth.pl   |    0
 python/raft-dask/pyproject.toml               |    4 +-
 34 files changed, 4885 insertions(+), 120 deletions(-)
 create mode 100644 conda/recipes/raft-ann-bench-cpu/build_raft_nn_bench_cpu.sh
 create mode 100644 conda/recipes/raft-ann-bench-cpu/meta.yaml
 rename conda/recipes/{libraft/build_libraft_nn_bench.sh => raft-ann-bench/build.sh} (100%)
 create mode 100644 conda/recipes/raft-ann-bench/conda_build_config.yaml
 create mode 100644 conda/recipes/raft-ann-bench/meta.yaml
 create mode 120000 python/raft-ann-bench/LICENSE
 create mode 100644 python/raft-ann-bench/pyproject.toml
 create mode 100644 python/raft-ann-bench/raft-ann-bench/__init__.py
 create mode 100644 python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
 create mode 100755 python/raft-ann-bench/raft-ann-bench/data_export/eval.pl
 create mode 100644 python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
 rename {cpp/bench/ann/scripts => python/raft-ann-bench/raft-ann-bench/get_dataset}/fbin_to_f16bin.py (64%)
 rename {cpp/bench/ann/scripts => python/raft-ann-bench/raft-ann-bench/get_dataset}/hdf5_to_fbin.py (81%)
 create mode 100644 python/raft-ann-bench/raft-ann-bench/plot/__main__.py
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/__main__.py
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/algos-cpu.yaml
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/algos.yaml
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
 rename {cpp/bench/ann/scripts => python/raft-ann-bench/raft-ann-bench/split_groundtruth}/split_groundtruth.pl (100%)

diff --git a/build.sh b/build.sh
index 8706f1b138..628332b6e5 100755
--- a/build.sh
+++ b/build.sh
@@ -39,6 +39,7 @@ HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<to
    --uninstall                 - uninstall files for specified targets which were built and installed prior
    --compile-lib               - compile shared libraries for all components
                                  can be useful for a pure header-only install
+   --cpu-only                  - build CPU only components without CUDA. Applies to bench-ann only currently.
    --limit-tests               - semicolon-separated list of test executables to compile (e.g. NEIGHBORS_TEST;CLUSTER_TEST)
    --limit-bench-prims         - semicolon-separated list of prims benchmark executables to compute (e.g. NEIGHBORS_PRIMS_BENCH;CLUSTER_PRIMS_BENCH)
    --limit-bench-ann           - semicolon-separated list of ann benchmark executables to compute (e.g. HNSWLIB_ANN_BENCH;RAFT_IVF_PQ_ANN_BENCH)
@@ -71,6 +72,7 @@ BUILD_TESTS=OFF
 BUILD_TYPE=Release
 BUILD_PRIMS_BENCH=OFF
 BUILD_ANN_BENCH=OFF
+CPU_ONLY=OFF
 COMPILE_LIBRARY=OFF
 INSTALL_TARGET=install
 BUILD_REPORT_METRICS=""
@@ -152,7 +154,7 @@ function limitTests {
             # Remove the full LIMIT_TEST_TARGETS argument from list of args so that it passes validArgs function
             ARGS=${ARGS//--limit-tests=$LIMIT_TEST_TARGETS/}
             TEST_TARGETS=${LIMIT_TEST_TARGETS}
-	    echo "Limiting tests to $TEST_TARGETS"
+        echo "Limiting tests to $TEST_TARGETS"
         fi
     fi
 }
@@ -342,7 +344,12 @@ fi
 if hasArg bench-ann || (( ${NUMARGS} == 0 )); then
     BUILD_ANN_BENCH=ON
     CMAKE_TARGET="${CMAKE_TARGET};${ANN_BENCH_TARGETS}"
-    COMPILE_LIBRARY=ON
+    if hasArg cpu-only; then
+        COMPILE_LIBRARY=OFF
+    else
+        COMPILE_LIBRARY=ON
+        CPU_ONLY=ON
+    fi
 fi
 
 if hasArg --no-nvtx; then
@@ -415,6 +422,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
           -DBUILD_TESTS=${BUILD_TESTS} \
           -DBUILD_PRIMS_BENCH=${BUILD_PRIMS_BENCH} \
           -DBUILD_ANN_BENCH=${BUILD_ANN_BENCH} \
+          -DCPU_ONLY=${CPU_ONLY}
           -DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \
           ${CACHE_ARGS} \
           ${EXTRA_CMAKE_ARGS}
@@ -488,6 +496,12 @@ if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
         python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-dask
 fi
 
+# Build and (optionally) install the raft-ann-bench Python package
+if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
+    SKBUILD_CONFIGURE_OPTIONS="${SKBUILD_EXTRA_CMAKE_ARGS}" \
+        SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL}" \
+        python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-dask
+fi
 
 if hasArg docs; then
     set -x
diff --git a/ci/build_python.sh b/ci/build_python.sh
index b20fd51bca..48503bf841 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -26,4 +26,21 @@ rapids-mamba-retry mambabuild \
   --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
   conda/recipes/raft-dask
 
+# Build ann-bench for each cuda and python version
+rapids-mamba-retry mambabuild \
+--no-test \
+--channel "${CPP_CHANNEL}" \
+--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
+conda/recipes/raft-ann-bench
+
+# Build ann-bench-cpu only in CUDA 12 jobs since it only depends on python
+# version
+if [[ ${CUDA_VERSION} == "12.0.1" ]]; then
+  rapids-mamba-retry mambabuild \
+  --no-test \
+  --channel "${CPP_CHANNEL}" \
+  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
+  conda/recipes/raft-ann-bench-cpu
+fi
+
 rapids-upload-conda-to-s3 python
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 7e921decd5..223bafe70b 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -19,10 +19,10 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.7.1
 - dask-cuda==23.10.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.7.1
+- distributed==2023.7.1
 - doxygen>=1.8.20
 - gcc_linux-64=11.*
 - gmock>=1.13.0
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index 2ea685b529..e68feaad82 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -19,10 +19,10 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.7.1
 - dask-cuda==23.10.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.7.1
+- distributed==2023.7.1
 - doxygen>=1.8.20
 - gcc_linux-64=11.*
 - gmock>=1.13.0
diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml
index 09ef7ae4ab..96bc6cc5dd 100644
--- a/conda/recipes/libraft/meta.yaml
+++ b/conda/recipes/libraft/meta.yaml
@@ -269,62 +269,3 @@ outputs:
       home: https://rapids.ai/
       license: Apache-2.0
       summary: libraft template
-  - name: libraft-ann-bench
-    version: {{ version }}
-    script: build_libraft_nn_bench.sh
-    build:
-      script_env: *script_env
-      number: {{ GIT_DESCRIBE_NUMBER }}
-      string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
-      ignore_run_exports_from:
-        {% if cuda_major == "11" %}
-        - {{ compiler('cuda11') }}
-        {% endif %}
-    requirements:
-      build:
-        - {{ compiler('c') }}
-        - {{ compiler('cxx') }}
-        {% if cuda_major == "11" %}
-        - {{ compiler('cuda11') }} ={{ cuda_version }}
-        {% else %}
-        - {{ compiler('cuda') }}
-        {% endif %}
-        - cuda-version ={{ cuda_version }}
-        - cmake {{ cmake_version }}
-        - ninja
-        - sysroot_{{ target_platform }} {{ sysroot_version }}
-      host:
-        - {{ pin_subpackage('libraft', exact=True) }}
-        - cuda-version ={{ cuda_version }}
-        {% if cuda_major == "11" %}
-        - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
-        - libcublas {{ cuda11_libcublas_host_version }}
-        - libcublas-dev {{ cuda11_libcublas_host_version }}
-        {% else %}
-        - cuda-profiler-api
-        - libcublas-dev
-        {% endif %}
-        - glog {{ glog_version }}
-        - nlohmann_json {{ nlohmann_json_version }}
-        # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet
-        {% if cuda_major == "11" %}
-        - faiss-proc=*=cuda
-        - libfaiss {{ faiss_version }}
-        {% endif %}
-      run:
-        - {{ pin_subpackage('libraft', exact=True) }}
-        - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
-        {% if cuda_major == "11" %}
-        - cudatoolkit
-        {% endif %}
-        - glog {{ glog_version }}
-        # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet
-        {% if cuda_major == "11" %}
-        - faiss-proc=*=cuda
-        - libfaiss {{ faiss_version }}
-        {% endif %}
-        - h5py {{ h5py_version }}
-    about:
-      home: https://rapids.ai/
-      license: Apache-2.0
-      summary: libraft ann bench
diff --git a/conda/recipes/raft-ann-bench-cpu/build_raft_nn_bench_cpu.sh b/conda/recipes/raft-ann-bench-cpu/build_raft_nn_bench_cpu.sh
new file mode 100644
index 0000000000..b796b4d7d5
--- /dev/null
+++ b/conda/recipes/raft-ann-bench-cpu/build_raft_nn_bench_cpu.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+./build.sh bench-ann --cpu-only --no-nvtx --build-metrics=bench_ann --incl-cache-stats
+cmake --install cpp/build --component ann_bench
diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml
new file mode 100644
index 0000000000..c0450b9e8a
--- /dev/null
+++ b/conda/recipes/raft-ann-bench-cpu/meta.yaml
@@ -0,0 +1,65 @@
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+
+# Usage:
+#   conda build . -c conda-forge -c nvidia -c rapidsai
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
+{% set cuda_major = cuda_version.split('.')[0] %}
+{% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
+
+package:
+  name: raft-ann-bench-cpu
+  version: {{ version }}
+  script: build.sh
+
+source:
+  git_url: ../../..
+
+build:
+  script_env:
+    - AWS_ACCESS_KEY_ID
+    - AWS_SECRET_ACCESS_KEY
+    - AWS_SESSION_TOKEN
+    - CMAKE_C_COMPILER_LAUNCHER
+    - CMAKE_CUDA_COMPILER_LAUNCHER
+    - CMAKE_CXX_COMPILER_LAUNCHER
+    - CMAKE_GENERATOR
+    - PARALLEL_LEVEL
+    - RAPIDS_ARTIFACTS_DIR
+    - SCCACHE_BUCKET
+    - SCCACHE_IDLE_TIMEOUT
+    - SCCACHE_REGION
+    - SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64]
+    - SCCACHE_S3_USE_SSL
+  number: {{ GIT_DESCRIBE_NUMBER }}
+  string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+
+requirements:
+  build:
+    - {{ compiler('c') }}
+    - {{ compiler('cxx') }}
+    - cmake {{ cmake_version }}
+    - ninja
+    - sysroot_{{ target_platform }} {{ sysroot_version }}
+
+  host:
+    - glog {{ glog_version }}
+    - matplotlib
+    - nlohmann_json {{ nlohmann_json_version }}
+    - python
+    - pyyaml
+
+  run:
+    - glog {{ glog_version }}
+    - h5py {{ h5py_version }}
+    - matplotlib
+    - python
+    - pyyaml
+
+about:
+  home: https://rapids.ai/
+  license: Apache-2.0
+  summary: libraft ann bench
diff --git a/conda/recipes/libraft/build_libraft_nn_bench.sh b/conda/recipes/raft-ann-bench/build.sh
similarity index 100%
rename from conda/recipes/libraft/build_libraft_nn_bench.sh
rename to conda/recipes/raft-ann-bench/build.sh
diff --git a/conda/recipes/raft-ann-bench/conda_build_config.yaml b/conda/recipes/raft-ann-bench/conda_build_config.yaml
new file mode 100644
index 0000000000..d156f2609b
--- /dev/null
+++ b/conda/recipes/raft-ann-bench/conda_build_config.yaml
@@ -0,0 +1,73 @@
+c_compiler_version:
+  - 11
+
+cxx_compiler_version:
+  - 11
+
+cuda_compiler:
+  - cuda-nvcc
+
+cuda11_compiler:
+  - nvcc
+
+sysroot_version:
+  - "2.17"
+
+cmake_version:
+  - ">=3.26.4"
+
+nccl_version:
+  - ">=2.9.9"
+
+gtest_version:
+  - ">=1.13.0"
+
+glog_version:
+  - ">=0.6.0"
+
+faiss_version:
+  - ">=1.7.1"
+
+h5py_version:
+  - ">=3.8.0"
+
+nlohmann_json_version:
+  - ">=3.11.2"
+
+# The CTK libraries below are missing from the conda-forge::cudatoolkit package
+# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
+# and the "*_run_*" version specifiers correspond to `11.x` packages.
+
+cuda11_libcublas_host_version:
+  - "=11.11.3.6"
+
+cuda11_libcublas_run_version:
+  - ">=11.5.2.43,<12.0.0"
+
+cuda11_libcurand_host_version:
+  - "=10.3.0.86"
+
+cuda11_libcurand_run_version:
+  - ">=10.2.5.43,<10.3.1"
+
+cuda11_libcusolver_host_version:
+  - "=11.4.1.48"
+
+cuda11_libcusolver_run_version:
+  - ">=11.2.0.43,<11.4.2"
+
+cuda11_libcusparse_host_version:
+  - "=11.7.5.86"
+
+cuda11_libcusparse_run_version:
+  - ">=11.6.0.43,<12.0.0"
+
+# `cuda-profiler-api` only has `11.8.0` and `12.0.0` packages for all
+# architectures. The "*_host_*" version specifiers correspond to `11.8` packages and the
+# "*_run_*" version specifiers correspond to `11.x` packages.
+
+cuda11_cuda_profiler_api_host_version:
+  - "=11.8.86"
+
+cuda11_cuda_profiler_api_run_version:
+  - ">=11.4.240,<12"
diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml
new file mode 100644
index 0000000000..6e5580dad2
--- /dev/null
+++ b/conda/recipes/raft-ann-bench/meta.yaml
@@ -0,0 +1,96 @@
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+
+# Usage:
+#   conda build . -c conda-forge -c nvidia -c rapidsai
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
+{% set cuda_major = cuda_version.split('.')[0] %}
+{% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
+
+package:
+  name: raft-ann-bench
+  version: {{ version }}
+  script: build.sh
+
+source:
+  git_url: ../../..
+
+build:
+  script_env:
+    - AWS_ACCESS_KEY_ID
+    - AWS_SECRET_ACCESS_KEY
+    - AWS_SESSION_TOKEN
+    - CMAKE_C_COMPILER_LAUNCHER
+    - CMAKE_CUDA_COMPILER_LAUNCHER
+    - CMAKE_CXX_COMPILER_LAUNCHER
+    - CMAKE_GENERATOR
+    - PARALLEL_LEVEL
+    - RAPIDS_ARTIFACTS_DIR
+    - SCCACHE_BUCKET
+    - SCCACHE_IDLE_TIMEOUT
+    - SCCACHE_REGION
+    - SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64]
+    - SCCACHE_S3_USE_SSL
+  number: {{ GIT_DESCRIBE_NUMBER }}
+  string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  ignore_run_exports_from:
+    {% if cuda_major == "11" %}
+    - {{ compiler('cuda11') }}
+    {% endif %}
+
+requirements:
+  build:
+    - {{ compiler('c') }}
+    - {{ compiler('cxx') }}
+    {% if cuda_major == "11" %}
+    - {{ compiler('cuda11') }} ={{ cuda_version }}
+    {% else %}
+    - {{ compiler('cuda') }}
+    {% endif %}
+    - cuda-version ={{ cuda_version }}
+    - cmake {{ cmake_version }}
+    - ninja
+    - sysroot_{{ target_platform }} {{ sysroot_version }}
+
+  host:
+    - python
+    - libraft {{ version }}
+    - cuda-version ={{ cuda_version }}
+    {% if cuda_major == "11" %}
+    - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
+    - libcublas {{ cuda11_libcublas_host_version }}
+    - libcublas-dev {{ cuda11_libcublas_host_version }}
+    {% else %}
+    - cuda-profiler-api
+    - libcublas-dev
+    {% endif %}
+    - glog {{ glog_version }}
+    - nlohmann_json {{ nlohmann_json_version }}
+    # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet
+    {% if cuda_major == "11" %}
+    - faiss-proc=*=cuda
+    - libfaiss {{ faiss_version }}
+    {% endif %}
+
+  run:
+    - python
+    - libraft {{ version }}
+    - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+    {% if cuda_major == "11" %}
+    - cudatoolkit
+    {% endif %}
+    - glog {{ glog_version }}
+    # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet
+    {% if cuda_major == "11" %}
+    - faiss-proc=*=cuda
+    - libfaiss {{ faiss_version }}
+    {% endif %}
+    - h5py {{ h5py_version }}
+
+about:
+  home: https://rapids.ai/
+  license: Apache-2.0
+  summary: RAFT ANN GPU and CPU benchmarks
diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml
index c9caa4dd9b..cf1f8488bc 100644
--- a/conda/recipes/raft-dask/meta.yaml
+++ b/conda/recipes/raft-dask/meta.yaml
@@ -60,10 +60,10 @@ requirements:
     - cudatoolkit
     {% endif %}
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
-    - dask >=2023.7.1
-    - dask-core >=2023.7.1
+    - dask ==2023.7.1
+    - dask-core ==2023.7.1
     - dask-cuda ={{ minor_version }}
-    - distributed >=2023.7.1
+    - distributed ==2023.7.1
     - joblib >=0.11
     - nccl >=2.9.9
     - pylibraft {{ version }}
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index eb92d4e7b5..32b99fec4d 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -17,18 +17,26 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
 include(../fetch_rapids.cmake)
 include(rapids-cmake)
 include(rapids-cpm)
-include(rapids-cuda)
 include(rapids-export)
 include(rapids-find)
 
-rapids_cuda_init_architectures(RAFT)
+option(CPU_ONLY "Build CPU only components. Apples to RAFT ANN benchmarks currently" OFF)
+
+if(CPU_ONLY)
+  include(rapids-cuda)
+  rapids_cuda_init_architectures(RAFT)
+endif()
 
 project(
   RAFT
   VERSION ${RAFT_VERSION}
-  LANGUAGES CXX CUDA
+  LANGUAGES CXX
 )
 
+if(NOT CPU_ONLY)
+  enable_language(CUDA)
+endif()
+
 # Write the version header
 rapids_cmake_write_version_file(include/raft/version_config.hpp)
 
@@ -60,9 +68,10 @@ option(DISABLE_OPENMP "Disable OpenMP" OFF)
 option(RAFT_NVTX "Enable nvtx markers" OFF)
 
 set(RAFT_COMPILE_LIBRARY_DEFAULT OFF)
-if(BUILD_TESTS
-   OR BUILD_PRIMS_BENCH
-   OR BUILD_ANN_BENCH
+if((BUILD_TESTS
+    OR BUILD_PRIMS_BENCH
+    OR BUILD_ANN_BENCH)
+   AND NOT CPU_ONLY
 )
   set(RAFT_COMPILE_LIBRARY_DEFAULT ON)
 endif()
@@ -70,6 +79,11 @@ option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations"
        ${RAFT_COMPILE_LIBRARY_DEFAULT}
 )
 
+if(CPU_ONLY)
+  set(BUILD_SHARED_LIBS OFF)
+  set(BUILD_TESTS OFF)
+endif()
+
 # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to
 # have different values for the `Threads::Threads` target. Setting this flag ensures
 # `Threads::Threads` is the same value across all builds so that cache hits occur
@@ -82,6 +96,8 @@ include(CMakeDependentOption)
 message(VERBOSE "RAFT: Building optional components: ${raft_FIND_COMPONENTS}")
 message(VERBOSE "RAFT: Build RAFT unit-tests: ${BUILD_TESTS}")
 message(VERBOSE "RAFT: Building raft C++ benchmarks: ${BUILD_PRIMS_BENCH}")
+message(VERBOSE "RAFT: Building ANN benchmarks: ${BUILD_ANN_BENCH}")
+message(VERBOSE "RAFT: Build CPU only components: ${CPU_ONLY}")
 message(VERBOSE "RAFT: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}")
 message(VERBOSE "RAFT: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS})
 message(VERBOSE "RAFT: Disable OpenMP: ${DISABLE_OPENMP}")
@@ -116,7 +132,7 @@ if(DETECT_CONDA_ENV)
 endif()
 
 # ##################################################################################################
-# * compiler options ---------------------------------------------------------
+# * compiler options ----------------------------------------------------------
 
 set(_ctk_static_suffix "")
 if(CUDA_STATIC_RUNTIME)
@@ -127,11 +143,7 @@ endif()
 rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
 
 if(NOT DISABLE_OPENMP)
-  rapids_find_package(
-    OpenMP REQUIRED
-    BUILD_EXPORT_SET raft-exports
-    INSTALL_EXPORT_SET raft-exports
-  )
+  find_package(OpenMP)
   if(OPENMP_FOUND)
     message(VERBOSE "RAFT: OpenMP found in ${OpenMP_CXX_INCLUDE_DIRS}")
   endif()
@@ -154,13 +166,15 @@ include(cmake/modules/ConfigureCUDA.cmake)
 # add third party dependencies using CPM
 rapids_cpm_init()
 
-# thrust before rmm/cuco so we get the right version of thrust/cub
-include(cmake/thirdparty/get_thrust.cmake)
-include(cmake/thirdparty/get_rmm.cmake)
-include(cmake/thirdparty/get_cutlass.cmake)
+if(NOT CPU_ONLY)
+  # thrust before rmm/cuco so we get the right version of thrust/cub
+  include(cmake/thirdparty/get_thrust.cmake)
+  include(cmake/thirdparty/get_rmm.cmake)
+  include(cmake/thirdparty/get_cutlass.cmake)
 
-include(${rapids-cmake-dir}/cpm/cuco.cmake)
-rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports)
+  include(${rapids-cmake-dir}/cpm/cuco.cmake)
+  rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports)
+endif()
 
 if(BUILD_TESTS)
   include(cmake/thirdparty/get_gtest.cmake)
@@ -180,8 +194,10 @@ target_include_directories(
   raft INTERFACE "$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:include>"
 )
 
-# Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
-target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass raft::Thrust)
+if(NOT CPU_ONLY)
+  # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
+  target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass raft::Thrust)
+endif()
 
 target_compile_features(raft INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
 target_compile_options(
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 119a5c0a73..2df845636e 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -13,7 +13,7 @@
 # =============================================================================
 
 # ##################################################################################################
-# * compiler function -----------------------------------------------------------------------------
+# * benchmark options- -----------------------------------------------------------------------------
 
 option(RAFT_ANN_BENCH_USE_FAISS_BFKNN "Include faiss' brute-force knn algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
@@ -27,8 +27,21 @@ option(RAFT_ANN_BENCH_SINGLE_EXE
        "Make a single executable with benchmark as shared library modules" OFF
 )
 
+# ##################################################################################################
+# * Process options ----------------------------------------------------------
+
 find_package(Threads REQUIRED)
 
+if(CPU_ONLY)
+  set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF)
+  set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF)
+  set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF)
+  set(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OFF)
+  set(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OFF)
+  set(RAFT_ANN_BENCH_USE_RAFT_CAGRA OFF)
+  set(RAFT_ANN_BENCH_USE_GGNN OFF)
+endif()
+
 # Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled.
 # https://github.com/rapidsai/raft/issues/1627
 if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0)
@@ -53,12 +66,25 @@ if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ
   set(RAFT_ANN_BENCH_USE_RAFT ON)
 endif()
 
+option(RAFT_ANN_BENCH_USE_MULTIGPU "Use multi-gpus (where possible) in benchmarks" OFF)
+
+message(VERBOSE "RAFT: Build ann-bench with FAISS_BFKNN: ${RAFT_ANN_BENCH_USE_FAISS_BFKNN}")
+message(VERBOSE "RAFT: Build ann-bench with FAISS_IVF_FLAT: ${RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT}")
+message(VERBOSE "RAFT: Build ann-bench with FAISS_IVF_PQ: ${RAFT_ANN_BENCH_USE_FAISS_IVF_PQ}")
+message(VERBOSE "RAFT: Build ann-bench with RAFT_IVF_FLAT: ${RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT}")
+message(VERBOSE "RAFT: Build ann-bench with RAFT_IVF_PQ: ${RAFT_ANN_BENCH_USE_RAFT_IVF_PQ}")
+message(VERBOSE "RAFT: Build ann-bench with RAFT_CAGRA: ${RAFT_ANN_BENCH_USE_RAFT_CAGRA}")
+message(VERBOSE "RAFT: Build ann-bench with HNSWLIB: ${RAFT_ANN_BENCH_USE_HNSWLIB}")
+message(VERBOSE "RAFT: Build ann-bench with GGNN: ${RAFT_ANN_BENCH_USE_GGNN}")
+message(VERBOSE "RAFT: Build ann-bench with MULTIGPU: ${RAFT_ANN_BENCH_USE_MULTIGPU}")
+
+# ##################################################################################################
+# * Fetch requirements -------------------------------------------------------------
+
 if(RAFT_ANN_BENCH_USE_HNSWLIB)
   include(cmake/thirdparty/get_hnswlib.cmake)
 endif()
 
-option(RAFT_ANN_BENCH_USE_MULTIGPU "Use multi-gpus (where possible) in benchmarks" OFF)
-
 include(cmake/thirdparty/get_nlohmann_json.cmake)
 
 if(RAFT_ANN_BENCH_USE_GGNN)
@@ -69,11 +95,18 @@ if(RAFT_ANN_BENCH_USE_FAISS)
   include(cmake/thirdparty/get_faiss.cmake)
 endif()
 
+# ##################################################################################################
+# * Configure tests function-------------------------------------------------------------
+
 function(ConfigureAnnBench)
 
   set(oneValueArgs NAME)
   set(multiValueArgs PATH LINKS CXXFLAGS INCLUDES)
 
+  if(NOT CPU_ONLY)
+    set(GPU_BUILD ON)
+  endif()
+
   cmake_parse_arguments(
     ConfigureAnnBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}
   )
@@ -95,10 +128,10 @@ function(ConfigureAnnBench)
     ${BENCH_NAME}
     PRIVATE raft::raft
             nlohmann_json::nlohmann_json
-            $<$<BOOL:${RAFT_ANN_BENCH_USE_MULTIGPU}>:NCCL::NCCL>
+            $<$<BOOL:GPU_BUILD>:$<$<BOOL:${RAFT_ANN_BENCH_USE_MULTIGPU}>:NCCL::NCCL>>
             ${ConfigureAnnBench_LINKS}
             Threads::Threads
-            ${RAFT_CTK_MATH_DEPENDENCIES}
+            $<$<BOOL:GPU_BUILD>:${RAFT_CTK_MATH_DEPENDENCIES}>
             $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
             $<TARGET_NAME_IF_EXISTS:conda_env>
             -static-libgcc
@@ -147,6 +180,9 @@ function(ConfigureAnnBench)
   )
 endfunction()
 
+# ##################################################################################################
+# * Configure tests-------------------------------------------------------------
+
 if(RAFT_ANN_BENCH_USE_HNSWLIB)
   ConfigureAnnBench(
     NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp INCLUDES
@@ -216,6 +252,7 @@ endif()
 
 # ##################################################################################################
 # * Dynamically-loading ANN_BENCH executable -------------------------------------------------------
+
 if(RAFT_ANN_BENCH_SINGLE_EXE)
   add_executable(ANN_BENCH bench/ann/src/common/benchmark.cpp)
 
diff --git a/dependencies.yaml b/dependencies.yaml
index cf8170b9a1..d90a6e6e64 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -98,6 +98,20 @@ files:
       key: test
     includes:
       - test_python_common
+  py_build_raft_ann_bench:
+    output: pyproject
+    pyproject_dir: python/raft-ann-bench
+    extras:
+      table: build-system
+    includes:
+      - build_wheels
+  py_run_raft_ann_bench:
+    output: pyproject
+    pyproject_dir: python/raft-ann-bench
+    extras:
+      table: project
+    includes:
+      - nn_bench_python
 channels:
   - rapidsai
   - rapidsai-nightly
@@ -161,7 +175,7 @@ dependencies:
           - clang-tools=16.0.1
   nn_bench:
     common:
-      - output_types: [conda]
+      - output_types: [conda, pyproject, requirements]
         packages:
           - hnswlib=0.7.0
           - nlohmann_json>=3.11.2
@@ -171,6 +185,12 @@ dependencies:
           - faiss-proc=*=cuda
           - matplotlib
           - pyyaml
+  nn_bench_python:
+    common:
+      - output_types: [conda]
+        packages:
+          - matplotlib
+          - pyyaml
 
   cudatoolkit:
     specific:
@@ -305,16 +325,16 @@ dependencies:
     common:
       - output_types: [conda, pyproject]
         packages:
-          - dask>=2023.7.1
+          - dask==2023.7.1
           - dask-cuda==23.10.*
-          - distributed>=2023.7.1
+          - distributed==2023.7.1
           - joblib>=0.11
           - numba>=0.57
           - *numpy
           - ucx-py==0.34.*
       - output_types: conda
         packages:
-          - dask-core>=2023.7.1
+          - dask-core==2023.7.1
           - ucx>=1.13.0
           - ucx-proc=*=gpu
       - output_types: pyproject
diff --git a/python/raft-ann-bench/LICENSE b/python/raft-ann-bench/LICENSE
new file mode 120000
index 0000000000..30cff7403d
--- /dev/null
+++ b/python/raft-ann-bench/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
\ No newline at end of file
diff --git a/python/raft-ann-bench/pyproject.toml b/python/raft-ann-bench/pyproject.toml
new file mode 100644
index 0000000000..4c8cc94288
--- /dev/null
+++ b/python/raft-ann-bench/pyproject.toml
@@ -0,0 +1,57 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+[build-system]
+build-backend = "setuptools.build_meta"
+requires = [
+    "setuptools",
+    "wheel",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project]
+name = "raft-ann-bench"
+version = "23.10.00"
+description = "RAFT ANN benchmarks"
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { text = "Apache 2.0" }
+requires-python = ">=3.9"
+dependencies = [
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+classifiers = [
+    "Intended Audience :: Developers",
+    "Topic :: Database",
+    "Topic :: Scientific/Engineering",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+]
+dynamic = ["entry-points"]
+
+[project.urls]
+Homepage = "https://github.com/rapidsai/raft"
+
+[tool.setuptools]
+license-files = ["LICENSE"]
+
+[tool.isort]
+line_length = 79
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+combine_as_imports = true
+order_by_type = true
+skip = [
+    "thirdparty",
+    ".eggs",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".tox",
+    ".venv",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+]
diff --git a/python/raft-ann-bench/raft-ann-bench/__init__.py b/python/raft-ann-bench/raft-ann-bench/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
new file mode 100644
index 0000000000..87ca330ed9
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
@@ -0,0 +1,80 @@
+#
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+import json
+
+from pathlib import Path
+
+def parse_filepaths(fs):
+    for p in fs:
+        if p.endswith(".json") and os.path.exists(p):
+            yield p
+        else:
+            for f in Path(p).rglob('*.json'):
+                yield f.as_posix()
+
+def export_results(output_filepath, recompute, groundtruth_filepath,
+                   result_filepath):
+    print(f"Writing output file to: {output_filepath}")
+
+    parsed_filepaths = parse_filepaths(result_filepaths)
+
+    with open(output_filepath, 'w') as out:
+        out.write("Algo,Recall,QPS\n")
+
+        for fp in parsed_filepaths:
+            with open(fp, 'r') as f:
+                data = json.load(f)
+                for benchmark_case in data["benchmarks"]:
+                    algo = benchmark_case["name"]
+                    recall = benchmark_case["Recall"]
+                    qps = benchmark_case["items_per_second"]
+                    out.write(f"{algo},{recall},{qps}\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("--output", help="Path to the CSV output file",
+                        required=True)
+    parser.add_argument("--recompute", action="store_true",
+                        help="Recompute metrics")
+    parser.add_argument("--dataset",
+                        help="Name of the dataset to export results for",
+                        default="glove-100-inner")
+    parser.add_argument(
+        "--dataset-path",
+        help="path to dataset folder",
+        default=os.path.join(os.getenv("RAFT_HOME"),
+                             "bench", "ann", "data")
+    )
+
+    args, result_filepaths = parser.parse_known_args()
+
+    # if nothing is provided
+    if len(result_filepaths) == 0:
+        raise ValueError("No filepaths to results were provided")
+
+    groundtruth_filepath = os.path.join(args.dataset_path, args.dataset,
+                                        "groundtruth.neighbors.ibin")
+    export_results(args.output, args.recompute, groundtruth_filepath,
+                   result_filepath)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/raft-ann-bench/raft-ann-bench/data_export/eval.pl b/python/raft-ann-bench/raft-ann-bench/data_export/eval.pl
new file mode 100755
index 0000000000..81c5563d79
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/data_export/eval.pl
@@ -0,0 +1,430 @@
+#!/usr/bin/perl
+
+# =============================================================================
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+use warnings;
+use strict;
+use autodie qw(open close);
+use File::Find;
+use Getopt::Std;
+
+my $QPS = 'QPS';
+my $AVG_LATENCY = 'avg_latency(ms)';
+my $P99_LATENCY = 'p99_latency(ms)';
+my $P999_LATENCY = 'p999_latency(ms)';
+my @CONDITIONS = ([$QPS, 2000], ['recall', 0.9], ['recall', 0.95]);
+
+
+my $USAGE = << 'END';
+usage: [-f] [-l avg|p99|p999] [-o output.csv] groundtruth.neighbors.ibin result_paths...
+  result_paths... are paths to the search result files.
+    Can specify multiple paths.
+    For each of them, if it's a directory, all the .txt files found under
+    it recursively will be regarded as inputs.
+
+  -f: force to recompute recall and update it in result file if needed
+  -l: output search latency rather than QPS. Available options:
+        "avg" for average latency;
+        "p99" for 99th percentile latency;
+        "p999" for 99.9th percentile latency.
+  -o: also write result to a csv file
+END
+
+
+my %opt;
+getopts('fl:o:', \%opt)
+  or die $USAGE;
+my $force_calc_recall = exists $opt{f} ? 1 : 0;
+my $csv_file;
+$csv_file = $opt{o} if exists $opt{o};
+my $metric = $QPS;
+if (exists $opt{l}) {
+    my $option = $opt{l};
+    if ($option eq 'avg') {
+        $metric = $AVG_LATENCY;
+    }
+    elsif ($option eq 'p99') {
+        $metric = $P99_LATENCY;
+    }
+    elsif ($option eq 'p999') {
+        $metric = $P999_LATENCY;
+    }
+    else {
+        die
+          "[error] illegal value for '-l': '$option'. Must be 'avg', 'p99' or 'p999'\n";
+    }
+}
+
+@ARGV >= 2
+  or die $USAGE;
+
+
+my $truth_file = shift @ARGV;
+my ($k, $dataset, $distance, $results) = get_all_results($metric, @ARGV);
+if (!defined $k) {
+    print STDERR "no result file found\n";
+    exit -1;
+}
+print STDERR "dataset = $dataset, distance = $distance, k = $k\n\n";
+calc_missing_recall($results, $truth_file, $force_calc_recall);
+
+my @results = sort {
+         $a->{name} cmp $b->{name}
+      or $a->{recall} <=> $b->{recall}
+      or $b->{qps} <=> $a->{qps}
+} @$results;
+printf("%-60s  %6s %16s  %s\n", '', 'Recall', $metric, 'search_param');
+for my $result (@results) {
+    my $fmt = ($metric eq $QPS) ? '%16.1f' : '%16.3f';
+    my $qps = $result->{qps};
+    $qps *= 1000 if $metric ne $QPS;    # the unit of latency is ms
+    printf("%-60s  %6.4f ${fmt}  %s\n",
+        $result->{name}, $result->{recall}, $qps, $result->{search_param});
+}
+if (defined $csv_file) {
+    open my $fh, '>', $csv_file;
+    print {$fh} ",Recall,${metric},search_param\n";
+    for my $result (@results) {
+        my $qps = $result->{qps};
+        $qps *= 1000 if $metric ne $QPS;
+        printf {$fh} (
+            "%s,%.4f,%.3f,%s\n", $result->{name}, $result->{recall},
+            $qps, $result->{search_param}
+        );
+    }
+}
+print "\n";
+calc_and_print_estimation($results, $metric, \@CONDITIONS);
+
+
+
+
+sub read_result {
+    my ($fname) = @_;
+    open my $fh, '<', $fname;
+    my %attr;
+    while (<$fh>) {
+        chomp;
+        next if /^\s*$/;
+        my $pos = index($_, ':');
+        $pos != -1
+          or die "[error] no ':' is found: '$_'\n";
+        my $key = substr($_, 0, $pos);
+        my $val = substr($_, $pos + 1);
+        $key =~ s/^\s+|\s+$//g;
+        $val =~ s/^\s+|\s+$//g;
+
+        # old version benchmark compatible
+        if ($key eq 'search_time') {
+            $key = 'average_search_time';
+            $val *= $attr{batch_size};
+        }
+        $attr{$key} = $val;
+    }
+    return \%attr;
+}
+
+sub overwrite_recall_to_result {
+    my ($fname, $recall) = @_;
+    open my $fh_in, '<', $fname;
+    $recall = sprintf("%f", $recall);
+    my $out;
+    while (<$fh_in>) {
+        s/^recall: .*/recall: $recall/;
+        $out .= $_;
+    }
+    close $fh_in;
+
+    open my $fh_out, '>', $fname;
+    print {$fh_out} $out;
+}
+
+sub append_recall_to_result {
+    my ($fname, $recall) = @_;
+    open my $fh, '>>', $fname;
+    printf {$fh} ("recall: %f\n", $recall);
+}
+
+sub get_all_results {
+    my ($metric) = shift @_;
+
+    my %fname;
+    my $wanted = sub {
+        if (-f && /\.txt$/) {
+            $fname{$File::Find::name} = 1;
+        }
+    };
+    find($wanted, @_);
+
+    my $k;
+    my $dataset;
+    my $distance;
+    my @results;
+    for my $f (sort keys %fname) {
+        print STDERR "reading $f ...\n";
+        my $attr = read_result($f);
+        if (!defined $k) {
+            $k = $attr->{k};
+            $dataset = $attr->{dataset};
+            $distance = $attr->{distance};
+        }
+        else {
+            $attr->{k} eq $k
+              or die "[error] k should be $k, but is $attr->{k} in $f\n";
+            $attr->{dataset} eq $dataset
+              or die
+              "[error] dataset should be $dataset, but is $attr->{dataset} in $f\n";
+            $attr->{distance} eq $distance
+              or die
+              "[error] distance should be $distance, but is $attr->{distance} in $f\n";
+        }
+
+        my $batch_size = $attr->{batch_size};
+        $batch_size =~ s/000000$/M/;
+        $batch_size =~ s/000$/K/;
+        my $search_param = $attr->{search_param};
+        $search_param =~ s/^{//;
+        $search_param =~ s/}$//;
+        $search_param =~ s/,/ /g;
+        $search_param =~ s/"//g;
+
+        my $qps;
+        if ($metric eq $QPS) {
+            $qps = $attr->{batch_size} / $attr->{average_search_time};
+        }
+        elsif ($metric eq $AVG_LATENCY) {
+            $qps = $attr->{average_search_time};
+        }
+        elsif ($metric eq $P99_LATENCY) {
+            exists $attr->{p99_search_time}
+              or die "[error] p99_search_time is not found\n";
+            $qps = $attr->{p99_search_time};
+        }
+        elsif ($metric eq $P999_LATENCY) {
+            exists $attr->{p999_search_time}
+              or die "[error] p999_search_time is not found\n";
+            $qps = $attr->{p999_search_time};
+        }
+        else {
+            die "[error] unknown latency type: '$metric'\n";
+        }
+        my $result = {
+            file => $f,
+            name => "$attr->{name}-batch${batch_size}",
+            search_param => $search_param,
+            qps => $qps,
+        };
+
+        if (exists $attr->{recall}) {
+            $result->{recall} = $attr->{recall};
+        }
+        push @results, $result;
+    }
+    return $k, $dataset, $distance, \@results;
+}
+
+sub read_ibin {
+    my ($fname) = @_;
+
+    open my $fh, '<:raw', $fname;
+    my $raw;
+
+    read($fh, $raw, 8);
+    my ($nrows, $dim) = unpack('LL', $raw);
+
+    my $expected_size = 8 + $nrows * $dim * 4;
+    my $size = (stat($fh))[7];
+    $size == $expected_size
+      or die(
+        "[error] expected size is $expected_size, but actual size is $size\n");
+
+    read($fh, $raw, $nrows * $dim * 4) == $nrows * $dim * 4
+      or die "[error] read $fname failed\n";
+    my @data = unpack('l' x ($nrows * $dim), $raw);
+    return \@data, $nrows, $dim;
+}
+
+sub pick_k_neighbors {
+    my ($neighbors, $nrows, $ncols, $k) = @_;
+
+    my @res;
+    for my $i (0 .. $nrows - 1) {
+        my %neighbor_set;
+        for my $j (0 .. $k - 1) {
+            $neighbor_set{$neighbors->[$i * $ncols + $j]} = 1;
+        }
+        push @res, \%neighbor_set;
+    }
+    return \@res;
+}
+
+
+sub calc_recall {
+    my ($truth_k_neighbors, $result_neighbors, $nrows, $k) = @_;
+
+    my $recall = 0;
+    for my $i (0 .. $nrows - 1) {
+        my $tp = 0;
+        for my $j (0 .. $k - 1) {
+            my $neighbor = $result_neighbors->[$i * $k + $j];
+            ++$tp if exists $truth_k_neighbors->[$i]{$neighbor};
+        }
+        $recall += $tp;
+    }
+    return $recall / $k / $nrows;
+}
+
+sub calc_missing_recall {
+    my ($results, $truth_file, $force_calc_recall) = @_;
+
+    my $need_calc_recall = grep { !exists $_->{recall} } @$results;
+    return unless $need_calc_recall || $force_calc_recall;
+
+    my ($truth_neighbors, $nrows, $truth_k) = read_ibin($truth_file);
+    $truth_k >= $k
+      or die "[error] ground truth k ($truth_k) < k($k)\n";
+    my $truth_k_neighbors =
+      pick_k_neighbors($truth_neighbors, $nrows, $truth_k, $k);
+
+    for my $result (@$results) {
+        next if exists $result->{recall} && !$force_calc_recall;
+
+        my $result_bin_file = $result->{file};
+        $result_bin_file =~ s/txt$/ibin/;
+        print STDERR "calculating recall for $result_bin_file ...\n";
+        my ($result_neighbors, $result_nrows, $result_k) =
+          read_ibin($result_bin_file);
+        $result_k == $k
+          or die
+          "[error] k should be $k, but is $result_k in $result_bin_file\n";
+        $result_nrows == $nrows
+          or die
+          "[error] #row should be $nrows, but is $result_nrows in $result_bin_file\n";
+
+        my $recall =
+          calc_recall($truth_k_neighbors, $result_neighbors, $nrows, $k);
+        if (exists $result->{recall}) {
+            my $new_value = sprintf("%f", $recall);
+            if ($result->{recall} ne $new_value) {
+                print "update recall: $result->{recall} -> $new_value\n";
+                overwrite_recall_to_result($result->{file}, $recall);
+            }
+        }
+        else {
+            append_recall_to_result($result->{file}, $recall);
+        }
+        $result->{recall} = $recall;
+    }
+}
+
+
+sub estimate {
+    my ($results, $condition, $value) = @_;
+    my %point_of;
+    for my $result (@$results) {
+        my $point;
+        if ($condition eq 'recall') {
+            $point = [$result->{recall}, $result->{qps}];
+        }
+        else {
+            $point = [$result->{qps}, $result->{recall}];
+        }
+        push @{$point_of{$result->{name}}}, $point;
+    }
+
+    my @names = sort keys %point_of;
+    my @result;
+    for my $name (@names) {
+        my @points = sort { $a->[0] <=> $b->[0] } @{$point_of{$name}};
+        if ($value < $points[0][0] || $value > $points[$#points][0]) {
+            push @result, -1;
+            next;
+        }
+        elsif ($value == $points[0][0]) {
+            push @result, $points[0][1];
+            next;
+        }
+
+        for my $i (1 .. $#points) {
+            if ($points[$i][0] >= $value) {
+                push @result,
+                  linear_interpolation($value, @{$points[$i - 1]},
+                    @{$points[$i]});
+                last;
+            }
+        }
+    }
+    return \@names, \@result;
+}
+
+sub linear_interpolation {
+    my ($x, $x1, $y1, $x2, $y2) = @_;
+    return $y1 + ($x - $x1) * ($y2 - $y1) / ($x2 - $x1);
+}
+
+sub merge {
+    my ($all, $new, $scale) = @_;
+    @$all == @$new
+      or die "[error] length is not equal\n";
+    for my $i (0 .. @$all - 1) {
+        push @{$all->[$i]}, $new->[$i] * $scale;
+    }
+}
+
+sub calc_and_print_estimation {
+    my ($results, $metric, $conditions) = @_;
+
+    my @conditions = grep {
+        my $target = $_->[0];
+        if ($target eq 'recall' || $target eq $metric) {
+            1;
+        }
+        else {
+                 $target eq $QPS
+              || $target eq $AVG_LATENCY
+              || $target eq $P99_LATENCY
+              || $target eq $P999_LATENCY
+              or die "[error] unknown condition: '$target'\n";
+            0;
+        }
+    } @$conditions;
+
+    my @headers = map {
+        my $header;
+        if ($_->[0] eq 'recall') {
+            $header = $metric . '@recall' . $_->[1];
+        }
+        elsif ($_->[0] eq $metric) {
+            $header = 'recall@' . $metric . $_->[1];
+        }
+        $header;
+    } @conditions;
+
+    my $scale = ($metric eq $QPS) ? 1 : 1000;
+    my $estimations;
+    for my $condition (@conditions) {
+        my ($names, $estimate) = estimate($results, @$condition);
+        if (!defined $estimations) {
+            @$estimations = map { [$_] } @$names;
+        }
+        merge($estimations, $estimate, $scale);
+    }
+
+    my $fmt = "%-60s" . ("  %16s" x @headers) . "\n";
+    printf($fmt, '', @headers);
+    $fmt =~ s/16s/16.4f/g;
+    for (@$estimations) {
+        printf($fmt, @$_);
+    }
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py b/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
new file mode 100644
index 0000000000..a175384dc3
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
@@ -0,0 +1,93 @@
+#
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+from urllib.request import urlretrieve
+
+
+def get_dataset_path(name, ann_bench_data_path):
+    if not os.path.exists(ann_bench_data_path):
+        os.mkdir(ann_bench_data_path)
+    return os.path.join(ann_bench_data_path, f"{name}.hdf5")
+
+
+def download_dataset(url, path):
+    if not os.path.exists(path):
+        print(f"downloading {url} -> {path}...")
+        urlretrieve(url, path)
+
+
+def convert_hdf5_to_fbin(path, normalize):
+    ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
+                                         "cpp/bench/ann/scripts")
+    ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
+                                          "hdf5_to_fbin.py")
+    if normalize and "angular" in path:
+        p = subprocess.Popen(["python", ann_bench_scripts_path, "-n",
+                              "%s" % path])
+    else:
+        p = subprocess.Popen(["python", ann_bench_scripts_path,
+                              "%s" % path])
+    p.wait()
+
+
+def move(name, ann_bench_data_path):
+    if "angular" in name:
+        new_name = name.replace("angular", "inner")
+    else:
+        new_name = name
+    new_path = os.path.join(ann_bench_data_path, new_name)
+    if not os.path.exists(new_path):
+        os.mkdir(new_path)
+    for bin_name in ["base.fbin", "query.fbin", "groundtruth.neighbors.ibin",
+                     "groundtruth.distances.fbin"]:
+        os.rename(f"{ann_bench_data_path}/{name}.{bin_name}",
+                  f"{new_path}/{bin_name}")
+
+
+def download(name, normalize, ann_bench_data_path):
+    path = get_dataset_path(name, ann_bench_data_path)
+    try:
+        url = f"http://ann-benchmarks.com/{name}.hdf5"
+        download_dataset(url, path)
+
+        convert_hdf5_to_fbin(path, normalize)
+
+        move(name, ann_bench_data_path)
+    except Exception:
+        print(f"Cannot download {url}")
+        raise
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("--dataset", help="dataset to download",
+                        default="glove-100-angular")
+    parser.add_argument("--dataset-path", help="path to download dataset",
+                        default=os.path.join(os.getenv("RAFT_HOME"), 
+                                             "bench", "ann", "data"))
+    parser.add_argument("--normalize",
+                        help="normalize cosine distance to inner product",
+                        action="store_true")
+    args = parser.parse_args()
+
+    download(args.dataset, args.normalize, args.dataset_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cpp/bench/ann/scripts/fbin_to_f16bin.py b/python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py
similarity index 64%
rename from cpp/bench/ann/scripts/fbin_to_f16bin.py
rename to python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py
index 4ea8988d87..d3a929d581 100755
--- a/cpp/bench/ann/scripts/fbin_to_f16bin.py
+++ b/python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py
@@ -1,15 +1,18 @@
-# =============================================================================
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
+# Copyright (c) 2023, NVIDIA CORPORATION.
 #
-# http://www.apache.org/licenses/LICENSE-2.0
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/cpp/bench/ann/scripts/hdf5_to_fbin.py b/python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py
similarity index 81%
rename from cpp/bench/ann/scripts/hdf5_to_fbin.py
rename to python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py
index cfeb184ea8..04bdbb5720 100755
--- a/cpp/bench/ann/scripts/hdf5_to_fbin.py
+++ b/python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py
@@ -1,15 +1,18 @@
-# =============================================================================
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
+# Copyright (c) 2023, NVIDIA CORPORATION.
 #
-# http://www.apache.org/licenses/LICENSE-2.0
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 
 import sys
 import numpy as np
diff --git a/python/raft-ann-bench/raft-ann-bench/plot/__main__.py b/python/raft-ann-bench/raft-ann-bench/plot/__main__.py
new file mode 100644
index 0000000000..0020e398a9
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/plot/__main__.py
@@ -0,0 +1,240 @@
+#
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script is inspired by 
+# 1: https://github.com/erikbern/ann-benchmarks/blob/main/plot.py
+# 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py
+# 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py
+# Licence: https://github.com/erikbern/ann-benchmarks/blob/main/LICENSE
+
+import matplotlib as mpl
+
+mpl.use("Agg")  # noqa
+import argparse
+import itertools
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+
+
+
+metrics = {
+    "k-nn": {
+        "description": "Recall",
+        "worst": float("-inf"),
+        "lim": [0.0, 1.03],
+    },
+    "qps": {
+        "description": "Queries per second (1/s)",
+        "worst": float("-inf"),
+    }
+}
+
+
+def generate_n_colors(n):
+    vs = np.linspace(0.3, 0.9, 7)
+    colors = [(0.9, 0.4, 0.4, 1.0)]
+
+    def euclidean(a, b):
+        return sum((x - y) ** 2 for x, y in zip(a, b))
+
+    while len(colors) < n:
+        new_color = max(itertools.product(vs, vs, vs), key=lambda a: min(euclidean(a, b) for b in colors))
+        colors.append(new_color + (1.0,))
+    return colors
+
+
+def create_linestyles(unique_algorithms):
+    colors = dict(zip(unique_algorithms, generate_n_colors(len(unique_algorithms))))
+    linestyles = dict((algo, ["--", "-.", "-", ":"][i % 4]) for i, algo in enumerate(unique_algorithms))
+    markerstyles = dict((algo, ["+", "<", "o", "*", "x"][i % 5]) for i, algo in enumerate(unique_algorithms))
+    faded = dict((algo, (r, g, b, 0.3)) for algo, (r, g, b, a) in colors.items())
+    return dict((algo, (colors[algo], faded[algo], linestyles[algo], markerstyles[algo])) for algo in unique_algorithms)
+
+
+def get_up_down(metric):
+    if metric["worst"] == float("inf"):
+        return "down"
+    return "up"
+
+
+def get_left_right(metric):
+    if metric["worst"] == float("inf"):
+        return "left"
+    return "right"
+
+
+def get_plot_label(xm, ym):
+    template = "%(xlabel)s-%(ylabel)s tradeoff - %(updown)s and" " to the %(leftright)s is better"
+    return template % {
+        "xlabel": xm["description"],
+        "ylabel": ym["description"],
+        "updown": get_up_down(ym),
+        "leftright": get_left_right(xm),
+    }
+
+
+def create_pointset(data, xn, yn):
+    xm, ym = (metrics[xn], metrics[yn])
+    rev_y = -1 if ym["worst"] < 0 else 1
+    rev_x = -1 if xm["worst"] < 0 else 1
+    data.sort(key=lambda t: (rev_y * t[-1], rev_x * t[-2]))
+
+    axs, ays, als = [], [], []
+    # Generate Pareto frontier
+    xs, ys, ls = [], [], []
+    last_x = xm["worst"]
+    comparator = (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
+    for algo_name, xv, yv in data:
+        if not xv or not yv:
+            continue
+        axs.append(xv)
+        ays.append(yv)
+        als.append(algo_name)
+        if comparator(xv, last_x):
+            last_x = xv
+            xs.append(xv)
+            ys.append(yv)
+            ls.append(algo_name)
+    return xs, ys, ls, axs, ays, als
+
+
+def create_plot(all_data, raw, x_scale, y_scale, fn_out, linestyles):
+    xn = "k-nn"
+    yn = "qps"
+    xm, ym = (metrics[xn], metrics[yn])
+    # Now generate each plot
+    handles = []
+    labels = []
+    plt.figure(figsize=(12, 9))
+
+    # Sorting by mean y-value helps aligning plots with labels
+    def mean_y(algo):
+        xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
+        return -np.log(np.array(ys)).mean()
+
+    # Find range for logit x-scale
+    min_x, max_x = 1, 0
+    for algo in sorted(all_data.keys(), key=mean_y):
+        xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
+        min_x = min([min_x] + [x for x in xs if x > 0])
+        max_x = max([max_x] + [x for x in xs if x < 1])
+        color, faded, linestyle, marker = linestyles[algo]
+        (handle,) = plt.plot(
+            xs, ys, "-", label=algo, color=color, ms=7, mew=3, lw=3, marker=marker
+        )
+        handles.append(handle)
+        if raw:
+            (handle2,) = plt.plot(
+                axs, ays, "-", label=algo, color=faded, ms=5, mew=2, lw=2, marker=marker
+            )
+        labels.append(algo)
+
+    ax = plt.gca()
+    ax.set_ylabel(ym["description"])
+    ax.set_xlabel(xm["description"])
+    # Custom scales of the type --x-scale a3
+    if x_scale[0] == "a":
+        alpha = float(x_scale[1:])
+
+        def fun(x):
+            return 1 - (1 - x) ** (1 / alpha)
+
+        def inv_fun(x):
+            return 1 - (1 - x) ** alpha
+
+        ax.set_xscale("function", functions=(fun, inv_fun))
+        if alpha <= 3:
+            ticks = [inv_fun(x) for x in np.arange(0, 1.2, 0.2)]
+            plt.xticks(ticks)
+        if alpha > 3:
+            from matplotlib import ticker
+
+            ax.xaxis.set_major_formatter(ticker.LogitFormatter())
+            # plt.xticks(ticker.LogitLocator().tick_values(min_x, max_x))
+            plt.xticks([0, 1 / 2, 1 - 1e-1, 1 - 1e-2, 1 - 1e-3, 1 - 1e-4, 1])
+    # Other x-scales
+    else:
+        ax.set_xscale(x_scale)
+    ax.set_yscale(y_scale)
+    ax.set_title(get_plot_label(xm, ym))
+    plt.gca().get_position()
+    # plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])
+    ax.legend(handles, labels, loc="center left", bbox_to_anchor=(1, 0.5), prop={"size": 9})
+    plt.grid(visible=True, which="major", color="0.65", linestyle="-")
+    plt.setp(ax.get_xminorticklabels(), visible=True)
+
+    # Logit scale has to be a subset of (0,1)
+    if "lim" in xm and x_scale != "logit":
+        x0, x1 = xm["lim"]
+        plt.xlim(max(x0, 0), min(x1, 1))
+    elif x_scale == "logit":
+        plt.xlim(min_x, max_x)
+    if "lim" in ym:
+        plt.ylim(ym["lim"])
+
+    # Workaround for bug https://github.com/matplotlib/matplotlib/issues/6789
+    ax.spines["bottom"]._adjust_location()
+
+    plt.savefig(fn_out, bbox_inches="tight")
+    plt.close()
+
+
+def load_all_results(result_filepath):
+    results = dict()
+    with open(result_filepath, 'r') as f:
+        for line in f.readlines()[1:]:
+            split_lines = line.split(',')
+            algo_name = split_lines[0].split('.')[0]
+            if algo_name not in results:
+                results[algo_name] = []
+            results[algo_name].append([algo_name, float(split_lines[1]), 
+                                  float(split_lines[2])])
+    return results
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("--result-csv", help="Path to CSV Results", required=True)
+    parser.add_argument("--output", help="Path to the PNG output file",
+                        default=f"{os.getcwd()}/out.png")
+    parser.add_argument(
+        "--x-scale",
+        help="Scale to use when drawing the X-axis. \
+              Typically linear, logit or a2", 
+        default="linear"
+    )
+    parser.add_argument(
+        "--y-scale",
+        help="Scale to use when drawing the Y-axis",
+        choices=["linear", "log", "symlog", "logit"],
+        default="linear",
+    )
+    parser.add_argument(
+        "--raw", help="Show raw results (not just Pareto frontier) in faded colours", action="store_true"
+    )
+    args = parser.parse_args()
+
+    print(f"writing output to {args.output}")
+
+    results = load_all_results(args.result_csv)
+    linestyles = create_linestyles(sorted(results.keys()))
+
+    create_plot(results, args.raw, args.x_scale, args.y_scale, args.output, linestyles)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/raft-ann-bench/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/raft-ann-bench/run/__main__.py
new file mode 100644
index 0000000000..60b9a012ad
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/__main__.py
@@ -0,0 +1,234 @@
+#
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import os
+import subprocess
+import yaml
+
+
+def positive_int(input_str: str) -> int:
+    try:
+        i = int(input_str)
+        if i < 1:
+            raise ValueError
+    except ValueError:
+        raise argparse.ArgumentTypeError(f"{input_str} is not a positive integer")
+
+    return i
+
+def validate_algorithm(algos_conf, algo):
+    algos_conf_keys = set(algos_conf.keys())
+    return algo in algos_conf_keys and not algos_conf[algo]["disabled"]
+
+
+def find_executable(algos_conf, algo):
+    executable = algos_conf[algo]["executable"]
+    conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann",
+                              executable)
+    build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
+    if os.path.exists(conda_path):
+        return (executable, conda_path)
+    elif os.path.exists(build_path):
+        return (executable, build_path)
+    else:
+        raise FileNotFoundError(executable)
+
+
+def run_build_and_search(conf_filename, conf_file, executables_to_run,
+                         force, conf_filedir, build, search, k, batch_size):
+    for executable, ann_executable_path in executables_to_run.keys():
+        # Need to write temporary configuration
+        temp_conf_filename = f"temporary_executable_{conf_filename}"
+        temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename)
+        with open(temp_conf_filepath, "w") as f:
+            temp_conf = dict()
+            temp_conf["dataset"] = conf_file["dataset"]
+            temp_conf["search_basic_param"] = conf_file["search_basic_param"]
+            temp_conf["index"] = executables_to_run[(executable,
+                                                     ann_executable_path)]["index"]
+            json.dump(temp_conf, f)
+
+        if build:
+            if force:
+                p = subprocess.Popen([ann_executable_path, "--build", "--overwrite",
+                                    temp_conf_filepath])
+                p.wait()
+            else:
+                p = subprocess.Popen([ann_executable_path, "--build",
+                                    temp_conf_filepath])
+                p.wait()
+
+        if search:
+            legacy_result_folder = "result/" + temp_conf["dataset"]["name"]
+            os.makedirs(legacy_result_folder, exist_ok=True)
+            p = subprocess.Popen([
+                ann_executable_path,
+                "--search",
+                "--benchmark_counters_tabular",
+                "--benchmark_out_format=json",
+                "--override_kv=k:%s" % k,
+                "--override_kv=n_queries:%s" % batch_size,
+                f"--benchmark_out={legacy_result_folder}/{executable}.json",
+                temp_conf_filepath])
+            p.wait()
+
+        os.remove(temp_conf_filepath)
+
+
+def main():
+    scripts_path = os.path.dirname(os.path.realpath(__file__))
+    # Read list of allowed algorithms
+    with open(f"{scripts_path}/algos.yaml", "r") as f:
+        algos_conf = yaml.safe_load(f)
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument(
+        "-k", "--count", default=10, type=positive_int, help="the number of nearest neighbors to search for"
+    )
+    parser.add_argument(
+        "-bs", "--batch-size", default=10000, type=positive_int, help="number of query vectors to use in each query trial"
+    )
+    parser.add_argument(
+        "--configuration",
+        help="path to configuration file for a dataset",
+    )
+    parser.add_argument(
+        "--dataset",
+        help="dataset whose configuration file will be used",
+        default="glove-100-inner"
+    )
+    parser.add_argument(
+        "--dataset-path",
+        help="path to dataset folder",
+        default=os.path.join(os.getenv("RAFT_HOME"),
+                             "bench", "ann", "data")
+    )
+    parser.add_argument(
+        "--build",
+        action="store_true"
+    )
+    parser.add_argument(
+        "--search",
+        action="store_true"
+    )
+    parser.add_argument("--algorithms",
+                        help="run only comma separated list of named \
+                              algorithms",
+                        default=None)
+    parser.add_argument("--indices",
+                        help="run only comma separated list of named indices. \
+                              parameter `algorithms` is ignored",
+                        default=None)
+    parser.add_argument("-f", "--force",
+                        help="re-run algorithms even if their results \
+                              already exist",
+                        action="store_true")
+
+    args = parser.parse_args()
+
+    # If both build and search are not provided,
+    # run both
+    if not args.build and not args.search:
+        build = True
+        search = True
+    else:
+        build = args.build
+        search = args.search
+
+    k = args.count
+    batch_size = args.batch_size
+
+    # Read configuration file associated to dataset
+    if args.configuration:
+        conf_filepath = args.configuration
+    else:
+        conf_filepath = os.path.join(scripts_path, "conf", f"{args.dataset}.json")
+    conf_filename = conf_filepath.split("/")[-1]
+    conf_filedir = "/".join(conf_filepath.split("/")[:-1])
+    dataset_name = conf_filename.replace(".json", "")
+    dataset_path = os.path.realpath(os.path.join(args.dataset_path, dataset_name))
+    if not os.path.exists(conf_filepath):
+        raise FileNotFoundError(conf_filename)
+
+    with open(conf_filepath, "r") as f:
+        conf_file = json.load(f)
+
+    # Replace base, query to dataset-path
+    conf_file["dataset"]["base_file"] = os.path.join(dataset_path, "base.fbin")
+    conf_file["dataset"]["query_file"] = os.path.join(dataset_path, "query.fbin")
+    conf_file["dataset"]["groundtruth_neighbors_file"] = os.path.join(dataset_path, "groundtruth.neighbors.ibin")
+    # Ensure base and query files exist for dataset
+    if not os.path.exists(conf_file["dataset"]["base_file"]):
+        raise FileNotFoundError(conf_file["dataset"]["base_file"])
+    if not os.path.exists(conf_file["dataset"]["query_file"]):
+        raise FileNotFoundError(conf_file["dataset"]["query_file"])
+
+    executables_to_run = dict()
+    # At least one named index should exist in config file
+    if args.indices:
+        indices = set(args.indices.split(","))
+        # algo associated with index should still be present in algos.yaml
+        # and enabled
+        for index in conf_file["index"]:
+            curr_algo = index["algo"]
+            if index["name"] in indices and \
+                    validate_algorithm(algos_conf, curr_algo):
+                executable_path = find_executable(algos_conf, curr_algo)
+                if executable_path not in executables_to_run:
+                    executables_to_run[executable_path] = {"index": []}
+                executables_to_run[executable_path]["index"].append(index)
+
+    # switch to named algorithms if indices parameter is not supplied
+    elif args.algorithms:
+        algorithms = set(args.algorithms.split(","))
+        # pick out algorithms from conf file that exist
+        # and are enabled in algos.yaml
+        for index in conf_file["index"]:
+            curr_algo = index["algo"]
+            if curr_algo in algorithms and \
+                    validate_algorithm(algos_conf, curr_algo):
+                executable_path = find_executable(algos_conf, curr_algo)
+                if executable_path not in executables_to_run:
+                    executables_to_run[executable_path] = {"index": []}
+                executables_to_run[executable_path]["index"].append(index)
+
+    # default, try to run all available algorithms
+    else:
+        for index in conf_file["index"]:
+            curr_algo = index["algo"]
+            if validate_algorithm(algos_conf, curr_algo):
+                executable_path = find_executable(algos_conf, curr_algo)
+                if executable_path not in executables_to_run:
+                    executables_to_run[executable_path] = {"index": []}
+                executables_to_run[executable_path]["index"].append(index)
+
+    # Replace build, search to dataset path
+    for executable_path in executables_to_run:
+        for pos, index in enumerate(executables_to_run[executable_path]["index"]):
+            index["file"] = os.path.join(dataset_path, "index", index["name"])
+            index["search_result_file"] = \
+                os.path.join(dataset_path, "result", index["name"])
+            executables_to_run[executable_path]["index"][pos] = index
+
+    run_build_and_search(conf_filename, conf_file, executables_to_run,
+                         args.force, conf_filedir, build, search, k, batch_size)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/python/raft-ann-bench/raft-ann-bench/run/algos-cpu.yaml b/python/raft-ann-bench/raft-ann-bench/run/algos-cpu.yaml
new file mode 100644
index 0000000000..cb63d0920c
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/algos-cpu.yaml
@@ -0,0 +1,30 @@
+faiss_gpu_ivf_flat:
+  executable: FAISS_IVF_FLAT_ANN_BENCH
+  disabled: true
+faiss_gpu_flat:
+  executable: FAISS_IVF_FLAT_ANN_BENCH
+  disabled: true
+faiss_gpu_ivf_pq:
+  executable: FAISS_IVF_PQ_ANN_BENCH
+  disabled: true
+faiss_gpu_ivf_sq:
+  executable: FAISS_IVF_PQ_ANN_BENCH
+  disabled: true
+faiss_gpu_bfknn:
+  executable: FAISS_BFKNN_ANN_BENCH
+  disabled: true
+raft_ivf_flat:
+  executable: RAFT_IVF_FLAT_ANN_BENCH
+  disabled: true
+raft_ivf_pq:
+  executable: RAFT_IVF_PQ_ANN_BENCH
+  disabled: true
+raft_cagra:
+  executable: RAFT_CAGRA_ANN_BENCH
+  disabled: true
+ggnn:
+  executable: GGNN_ANN_BENCH
+  disabled: true
+hnswlib:
+  executable: HNSWLIB_ANN_BENCH
+  disabled: false
diff --git a/python/raft-ann-bench/raft-ann-bench/run/algos.yaml b/python/raft-ann-bench/raft-ann-bench/run/algos.yaml
new file mode 100644
index 0000000000..5f554fc46b
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/algos.yaml
@@ -0,0 +1,30 @@
+faiss_gpu_ivf_flat:
+  executable: FAISS_IVF_FLAT_ANN_BENCH
+  disabled: false
+faiss_gpu_flat:
+  executable: FAISS_IVF_FLAT_ANN_BENCH
+  disabled: false
+faiss_gpu_ivf_pq:
+  executable: FAISS_IVF_PQ_ANN_BENCH
+  disabled: false
+faiss_gpu_ivf_sq:
+  executable: FAISS_IVF_PQ_ANN_BENCH
+  disabled: false
+faiss_gpu_bfknn:
+  executable: FAISS_BFKNN_ANN_BENCH
+  disabled: false
+raft_ivf_flat:
+  executable: RAFT_IVF_FLAT_ANN_BENCH
+  disabled: false
+raft_ivf_pq:
+  executable: RAFT_IVF_PQ_ANN_BENCH
+  disabled: false
+raft_cagra:
+  executable: RAFT_CAGRA_ANN_BENCH
+  disabled: false
+ggnn:
+  executable: GGNN_ANN_BENCH
+  disabled: false
+hnswlib:
+  executable: HNSWLIB_ANN_BENCH
+  disabled: false
\ No newline at end of file
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json b/python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json
new file mode 100644
index 0000000000..0e59936f0e
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json
@@ -0,0 +1,202 @@
+{
+  "dataset" : {
+    "name" : "bigann-100M",
+    "base_file" : "data/bigann-1B/base.1B.u8bin",
+    "subset_size" : 100000000,
+    "query_file" : "data/bigann-1B/query.public.10K.u8bin",
+    "distance" : "euclidean"
+  },
+
+  "search_basic_param" : {
+    "batch_size" : 10000,
+    "k" : 10,
+    "run_count" : 2
+  },
+
+  "index" : [
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster5K-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "niter": 25,
+        "nlist": 5000,
+        "pq_dim": 64,
+        "ratio": 10
+      },
+      "file": "index/bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
+      "search_params": [
+        {
+          "numProbes": 20,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "numProbes": 30,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "numProbes": 40,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "numProbes": 1000,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/bigann-100M/raft_ivf_pq/dimpq64-cluster5K-float-float"
+    },
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file" : "index/bigann-100M/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/bigann-100M/hnswlib/M12"
+    },
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file" : "index/bigann-100M/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/bigann-100M/hnswlib/M16"
+    },
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file" : "index/bigann-100M/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/bigann-100M/hnswlib/M24"
+    },
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file" : "index/bigann-100M/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/bigann-100M/hnswlib/M36"
+    },
+
+
+    {
+      "name" : "raft_ivf_flat.nlist100K",
+      "algo" : "raft_ivf_flat",
+      "build_param": {
+        "nlist" : 100000,
+        "niter" : 25,
+        "ratio" : 5
+      },
+      "file" : "index/bigann-100M/raft_ivf_flat/nlist100K",
+      "search_params" : [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/bigann-100M/raft_ivf_flat/nlist100K"
+    },
+
+    {
+      "name" : "raft_cagra.dim32",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 32
+      },
+      "file" : "index/bigann-100M/raft_cagra/dim32",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/bigann-100M/raft_cagra/dim32"
+    },
+
+
+    {
+      "name" : "raft_cagra.dim64",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 64
+      },
+      "file" : "index/bigann-100M/raft_cagra/dim64",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/bigann-100M/raft_cagra/dim64"
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json
new file mode 100644
index 0000000000..2f2ec92489
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json
@@ -0,0 +1,849 @@
+{
+  "dataset" : {
+    "name" : "deep-100M",
+    "base_file" : "data/deep-1B/base.1B.fbin",
+    "subset_size" : 100000000,
+    "query_file" : "data/deep-1B/query.public.10K.fbin",
+    "distance" : "euclidean"
+  },
+
+  "search_basic_param" : {
+    "batch_size" : 10000,
+    "k" : 10,
+    "run_count" : 2
+  },
+
+  "index" : [
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file" : "index/deep-100M/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/deep-100M/hnswlib/M12"
+    },
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file" : "index/deep-100M/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/deep-100M/hnswlib/M16"
+    },
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file" : "index/deep-100M/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/deep-100M/hnswlib/M24"
+    },
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file" : "index/deep-100M/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/deep-100M/hnswlib/M36"
+    },
+    {
+      "name" : "faiss_ivf_flat.nlist50K",
+      "algo" : "faiss_gpu_ivf_flat",
+      "build_param": {"nlist":50000},
+      "file" : "index/deep-100M/faiss_ivf_flat/nlist50K",
+      "search_params" : [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist50K"
+    },
+    {
+      "name" : "faiss_ivf_flat.nlist100K",
+      "algo" : "faiss_gpu_ivf_flat",
+      "build_param": {"nlist":100000},
+      "file" : "index/deep-100M/faiss_ivf_flat/nlist100K",
+      "search_params" : [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist100K"
+    },
+    {
+      "name" : "faiss_ivf_flat.nlist200K",
+      "algo" : "faiss_gpu_ivf_flat",
+      "build_param": {"nlist":200000},
+      "file" : "index/deep-100M/faiss_ivf_flat/nlist200K",
+      "search_params" : [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist200K"
+    },
+
+
+    {
+      "name" : "faiss_ivf_pq.M48-nlist16K",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":16384, "M":48},
+      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist16K",
+      "search_params" : [
+        {"nprobe":10},
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500}
+      ],
+      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist16K"
+    },
+    {
+      "name" : "faiss_ivf_pq.M48-nlist50K",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":50000, "M":48},
+      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist50K",
+      "search_params" : [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist50K"
+    },
+    {
+      "name" : "faiss_ivf_pq.M48-nlist100K",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":100000, "M":48},
+      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist100K",
+      "search_params" : [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist100K"
+    },
+
+
+    {
+      "name" : "raft_ivf_flat.nlist10K",
+      "algo" : "raft_ivf_flat",
+      "build_param": {
+        "nlist" : 10000,
+        "niter" : 25,
+        "ratio" : 5
+      },
+      "file" : "index/deep-100M/raft_ivf_flat/nlist10K",
+      "search_params" : [
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/raft_ivf_flat/nlist10K"
+    },
+    {
+      "name" : "raft_ivf_flat.nlist100K",
+      "algo" : "raft_ivf_flat",
+      "build_param": {
+        "nlist" : 100000,
+        "niter" : 25,
+        "ratio" : 5
+      },
+      "file" : "index/deep-100M/raft_ivf_flat/nlist100K",
+      "search_params" : [
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/raft_ivf_flat/nlist100K"
+    },
+
+    {
+      "name" : "raft_ivf_pq.nlist10K",
+      "algo" : "raft_ivf_pq",
+      "build_param": {
+        "nlist" : 10000,
+        "niter" : 25,
+        "ratio" : 5
+      },
+      "file" : "index/deep-100M/raft_ivf_pq/nlist10K",
+      "search_params" : [
+        {"nprobe":3},
+        {"nprobe":10},
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10K"
+    },
+    {
+      "name" : "raft_ivf_pq.nlist10Kdim64",
+      "algo" : "raft_ivf_pq",
+      "build_param": {
+        "nlist" : 10000,
+        "niter" : 25,
+        "ratio" : 5,
+        "pq_dim": 64
+      },
+      "file" : "index/deep-100M/raft_ivf_pq/nlist10Kdim64",
+      "search_params" : [
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10Kdim64"
+    },
+    {
+      "name" : "raft_ivf_pq.nlist10Kdim32",
+      "algo" : "raft_ivf_pq",
+      "build_param": {
+        "nlist" : 10000,
+        "niter" : 25,
+        "ratio" : 5,
+        "pq_dim": 32
+      },
+      "file" : "index/deep-100M/raft_ivf_pq/nlist10Kdim32",
+      "search_params" : [
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10Kdim32"
+    },
+    {
+      "name" : "raft_ivf_pq.nlist100K",
+      "algo" : "raft_ivf_pq",
+      "build_param": {
+        "nlist" : 100000,
+        "niter" : 25,
+        "ratio" : 5
+      },
+      "file" : "index/deep-100M/raft_ivf_pq/nlist100K",
+      "search_params" : [
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100K"
+    },
+    {
+      "name" : "raft_ivf_pq.nlist100Kdim64",
+      "algo" : "raft_ivf_pq",
+      "build_param": {
+        "nlist" : 100000,
+        "niter" : 25,
+        "ratio" : 5,
+        "pq_dim": 64
+      },
+      "file" : "index/deep-100M/raft_ivf_pq/nlist100Kdim64",
+      "search_params" : [
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100Kdim64"
+    },
+    {
+      "name" : "raft_ivf_pq.nlist100Kdim32",
+      "algo" : "raft_ivf_pq",
+      "build_param": {
+        "nlist" : 100000,
+        "niter" : 25,
+        "ratio" : 5,
+        "pq_dim": 32
+      },
+      "file" : "index/deep-100M/raft_ivf_pq/nlist100Kdim32",
+      "search_params" : [
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100Kdim32"
+    },
+
+    {
+      "name" : "raft_cagra.dim32",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim": 32,
+        "intermediate_graph_degree": 48
+      },
+      "file": "index/deep-100M/raft_cagra/dim32",
+      "search_params" : [
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 0,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 32,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 64,
+          "search_width": 4,
+          "max_iterations": 16,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 64,
+          "search_width": 1,
+          "max_iterations": 64,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 96,
+          "search_width": 2,
+          "max_iterations": 48,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 128,
+          "search_width": 8,
+          "max_iterations": 16,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 128,
+          "search_width": 2,
+          "max_iterations": 64,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 192,
+          "search_width": 8,
+          "max_iterations": 24,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 192,
+          "search_width": 2,
+          "max_iterations": 96,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 256,
+          "search_width": 8,
+          "max_iterations": 32,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 384,
+          "search_width": 8,
+          "max_iterations": 48,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 512,
+          "search_width": 8,
+          "max_iterations": 64,
+          "algo": "single_cta"
+        },
+
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 0,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 32,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 64,
+          "search_width": 4,
+          "max_iterations": 16,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 64,
+          "search_width": 1,
+          "max_iterations": 64,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 96,
+          "search_width": 2,
+          "max_iterations": 48,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 128,
+          "search_width": 8,
+          "max_iterations": 16,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 128,
+          "search_width": 2,
+          "max_iterations": 64,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 192,
+          "search_width": 8,
+          "max_iterations": 24,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 192,
+          "search_width": 2,
+          "max_iterations": 96,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 256,
+          "search_width": 8,
+          "max_iterations": 32,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 384,
+          "search_width": 8,
+          "max_iterations": 48,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 512,
+          "search_width": 8,
+          "max_iterations": 64,
+          "algo": "multi_cta"
+        },
+
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 0,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 32,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 64,
+          "search_width": 4,
+          "max_iterations": 16,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 64,
+          "search_width": 1,
+          "max_iterations": 64,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 96,
+          "search_width": 2,
+          "max_iterations": 48,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 128,
+          "search_width": 8,
+          "max_iterations": 16,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 128,
+          "search_width": 2,
+          "max_iterations": 64,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 192,
+          "search_width": 8,
+          "max_iterations": 24,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 192,
+          "search_width": 2,
+          "max_iterations": 96,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 256,
+          "search_width": 8,
+          "max_iterations": 32,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 384,
+          "search_width": 8,
+          "max_iterations": 48,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 512,
+          "search_width": 8,
+          "max_iterations": 64,
+          "algo": "multi_kernel"
+        }
+      ],
+      "search_result_file": "result/deep-100M/raft_cagra/dim32"
+    },
+    {
+      "name" : "raft_cagra.dim64",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim": 64
+      },
+      "file": "index/deep-100M/raft_cagra/dim64",
+      "search_params" : [
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 0,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 32,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 64,
+          "search_width": 4,
+          "max_iterations": 16,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 64,
+          "search_width": 1,
+          "max_iterations": 64,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 96,
+          "search_width": 2,
+          "max_iterations": 48,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 128,
+          "search_width": 8,
+          "max_iterations": 16,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 128,
+          "search_width": 2,
+          "max_iterations": 64,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 192,
+          "search_width": 8,
+          "max_iterations": 24,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 192,
+          "search_width": 2,
+          "max_iterations": 96,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 256,
+          "search_width": 8,
+          "max_iterations": 32,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 384,
+          "search_width": 8,
+          "max_iterations": 48,
+          "algo": "single_cta"
+        },
+        {
+          "itopk": 512,
+          "search_width": 8,
+          "max_iterations": 64,
+          "algo": "single_cta"
+        },
+
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 0,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 32,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 64,
+          "search_width": 4,
+          "max_iterations": 16,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 64,
+          "search_width": 1,
+          "max_iterations": 64,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 96,
+          "search_width": 2,
+          "max_iterations": 48,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 128,
+          "search_width": 8,
+          "max_iterations": 16,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 128,
+          "search_width": 2,
+          "max_iterations": 64,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 192,
+          "search_width": 8,
+          "max_iterations": 24,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 192,
+          "search_width": 2,
+          "max_iterations": 96,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 256,
+          "search_width": 8,
+          "max_iterations": 32,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 384,
+          "search_width": 8,
+          "max_iterations": 48,
+          "algo": "multi_cta"
+        },
+        {
+          "itopk": 512,
+          "search_width": 8,
+          "max_iterations": 64,
+          "algo": "multi_cta"
+        },
+
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 0,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 32,
+          "search_width": 1,
+          "max_iterations": 32,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 64,
+          "search_width": 4,
+          "max_iterations": 16,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 64,
+          "search_width": 1,
+          "max_iterations": 64,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 96,
+          "search_width": 2,
+          "max_iterations": 48,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 128,
+          "search_width": 8,
+          "max_iterations": 16,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 128,
+          "search_width": 2,
+          "max_iterations": 64,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 192,
+          "search_width": 8,
+          "max_iterations": 24,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 192,
+          "search_width": 2,
+          "max_iterations": 96,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 256,
+          "search_width": 8,
+          "max_iterations": 32,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 384,
+          "search_width": 8,
+          "max_iterations": 48,
+          "algo": "multi_kernel"
+        },
+        {
+          "itopk": 512,
+          "search_width": 8,
+          "max_iterations": 64,
+          "algo": "multi_kernel"
+        }
+      ],
+      "search_result_file": "result/deep-100M/raft_cagra/dim64"
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json
new file mode 100644
index 0000000000..50d1b87602
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json
@@ -0,0 +1,38 @@
+{
+  "dataset" : {
+    "name" : "deep-1B",
+    "base_file" : "data/deep-1B/base.1B.fbin",
+    "query_file" : "data/deep-1B/query.public.10K.fbin",
+    // although distance should be "euclidean", faiss becomes much slower for that
+    "distance" : "inner_product"
+  },
+
+  "search_basic_param" : {
+    "batch_size" : 10000,
+    "k" : 10,
+    "run_count" : 2
+  },
+
+  "index" : [
+    {
+      "name" : "faiss_ivf_pq.M48-nlist50K",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":50000, "M":48},
+      "file" : "index/deep-1B/faiss_ivf_pq/M48-nlist50K",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000},
+        {"nprobe":2000}
+      ],
+      "search_result_file" : "result/deep-1B/faiss_ivf_pq/M48-nlist50K"
+    },
+
+
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json
new file mode 100644
index 0000000000..5d0bbf970c
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json
@@ -0,0 +1,793 @@
+{
+  "dataset" : {
+    "name" : "glove-100-inner",
+    "base_file" : "data/glove-100-inner/base.fbin",
+    "query_file" : "data/glove-100-inner/query.fbin",
+    "distance" : "inner_product"
+  },
+
+  "search_basic_param" : {
+    "batch_size" : 1,
+    "k" : 10,
+    "run_count" : 3
+  },
+
+  "index" : [
+    {
+      "name" : "hnswlib.M4",
+      "algo" : "hnswlib",
+      "build_param": {"M":4, "efConstruction":500, "numThreads":4},
+      "file" : "index/glove-100-inner/hnswlib/M4",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-inner/hnswlib/M4"
+    },
+
+    {
+      "name" : "hnswlib.M8",
+      "algo" : "hnswlib",
+      "build_param": {"M":8, "efConstruction":500, "numThreads":4},
+      "file" : "index/glove-100-inner/hnswlib/M8",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-inner/hnswlib/M8"
+    },
+
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":4},
+      "file" : "index/glove-100-inner/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-inner/hnswlib/M12"
+    },
+
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":4},
+      "file" : "index/glove-100-inner/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-inner/hnswlib/M16"
+    },
+
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":4},
+      "file" : "index/glove-100-inner/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-inner/hnswlib/M24"
+    },
+
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":4},
+      "file" : "index/glove-100-inner/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-inner/hnswlib/M36"
+    },
+
+    {
+      "name" : "hnswlib.M48",
+      "algo" : "hnswlib",
+      "build_param": {"M":48, "efConstruction":500, "numThreads":4},
+      "file" : "index/glove-100-inner/hnswlib/M48",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-inner/hnswlib/M48"
+    },
+
+    {
+      "name" : "hnswlib.M64",
+      "algo" : "hnswlib",
+      "build_param": {"M":64, "efConstruction":500, "numThreads":4},
+      "file" : "index/glove-100-inner/hnswlib/M64",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-inner/hnswlib/M64"
+    },
+
+    {
+      "name" : "hnswlib.M96",
+      "algo" : "hnswlib",
+      "build_param": {"M":96, "efConstruction":500, "numThreads":4},
+      "file" : "index/glove-100-inner/hnswlib/M96",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-inner/hnswlib/M96"
+    },
+
+    {
+      "name" : "faiss_ivf_flat.nlist1024",
+      "algo" : "faiss_gpu_ivf_flat",
+      "build_param": {"nlist":1024},
+      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist1024",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist1024"
+    },
+
+    {
+      "name" : "faiss_ivf_flat.nlist2048",
+      "algo" : "faiss_gpu_ivf_flat",
+      "build_param": {"nlist":2048},
+      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist2048",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist2048"
+    },
+
+    {
+      "name" : "faiss_ivf_flat.nlist4096",
+      "algo" : "faiss_gpu_ivf_flat",
+      "build_param": {"nlist":4096},
+      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist4096",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist4096"
+    },
+
+    {
+      "name" : "faiss_ivf_flat.nlist8192",
+      "algo" : "faiss_gpu_ivf_flat",
+      "build_param": {"nlist":8192},
+      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist8192",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist8192"
+    },
+
+    {
+      "name" : "faiss_ivf_flat.nlist16384",
+      "algo" : "faiss_gpu_ivf_flat",
+      "build_param": {"nlist":16384},
+      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist16384",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000},
+        {"nprobe":2000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist16384"
+    },
+
+
+
+    {
+      "name" : "faiss_ivf_pq.M2-nlist1024",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":1024, "M":2},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist1024",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist1024"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M2-nlist2048",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":2048, "M":2},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist2048",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist2048"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M2-nlist4096",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":4096, "M":2},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist4096",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist4096"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M2-nlist8192",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":8192, "M":2},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist8192",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist8192"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M2-nlist16384",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":16384, "M":2},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist16384",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000},
+        {"nprobe":2000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist16384"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M4-nlist1024",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":1024, "M":4},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist1024",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist1024"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M4-nlist2048",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":2048, "M":4},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist2048",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist2048"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M4-nlist4096",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":4096, "M":4},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist4096",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist4096"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M4-nlist8192",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":8192, "M":4},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist8192",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist8192"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M4-nlist16384",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":16384, "M":4},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist16384",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000},
+        {"nprobe":2000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist16384"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M20-nlist1024",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":1024, "M":20},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist1024",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist1024"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M20-nlist2048",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":2048, "M":20},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist2048",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist2048"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M20-nlist4096",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":4096, "M":20},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist4096",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist4096"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M20-nlist8192",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":8192, "M":20},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist8192",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist8192"
+    },
+
+    {
+      "name" : "faiss_ivf_pq.M20-nlist16384",
+      "algo" : "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":16384, "M":20},
+      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist16384",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000},
+        {"nprobe":2000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist16384"
+    },
+
+
+    {
+      "name" : "faiss_ivf_sq.nlist1024-fp16",
+      "algo" : "faiss_gpu_ivf_sq",
+      "build_param": {"nlist":1024, "quantizer_type":"fp16"},
+      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist1024-fp16",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist1024-fp16"
+    },
+
+    {
+      "name" : "faiss_ivf_sq.nlist2048-fp16",
+      "algo" : "faiss_gpu_ivf_sq",
+      "build_param": {"nlist":2048, "quantizer_type":"fp16"},
+      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist2048-fp16",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist2048-fp16"
+    },
+
+    {
+      "name" : "faiss_ivf_sq.nlist4096-fp16",
+      "algo" : "faiss_gpu_ivf_sq",
+      "build_param": {"nlist":4096, "quantizer_type":"fp16"},
+      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist4096-fp16",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist4096-fp16"
+    },
+
+    {
+      "name" : "faiss_ivf_sq.nlist8192-fp16",
+      "algo" : "faiss_gpu_ivf_sq",
+      "build_param": {"nlist":8192, "quantizer_type":"fp16"},
+      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist8192-fp16",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist8192-fp16"
+    },
+
+    {
+      "name" : "faiss_ivf_sq.nlist16384-fp16",
+      "algo" : "faiss_gpu_ivf_sq",
+      "build_param": {"nlist":16384, "quantizer_type":"fp16"},
+      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist16384-fp16",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000},
+        {"nprobe":2000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist16384-fp16"
+    },
+
+
+    {
+      "name" : "faiss_ivf_sq.nlist1024-int8",
+      "algo" : "faiss_gpu_ivf_sq",
+      "build_param": {"nlist":1024, "quantizer_type":"int8"},
+      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist1024-int8",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist1024-int8"
+    },
+
+    {
+      "name" : "faiss_ivf_sq.nlist2048-int8",
+      "algo" : "faiss_gpu_ivf_sq",
+      "build_param": {"nlist":2048, "quantizer_type":"int8"},
+      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist2048-int8",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist2048-int8"
+    },
+
+    {
+      "name" : "faiss_ivf_sq.nlist4096-int8",
+      "algo" : "faiss_gpu_ivf_sq",
+      "build_param": {"nlist":4096, "quantizer_type":"int8"},
+      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist4096-int8",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist4096-int8"
+    },
+
+    {
+      "name" : "faiss_ivf_sq.nlist8192-int8",
+      "algo" : "faiss_gpu_ivf_sq",
+      "build_param": {"nlist":8192, "quantizer_type":"int8"},
+      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist8192-int8",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist8192-int8"
+    },
+
+    {
+      "name" : "faiss_ivf_sq.nlist16384-int8",
+      "algo" : "faiss_gpu_ivf_sq",
+      "build_param": {"nlist":16384, "quantizer_type":"int8"},
+      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist16384-int8",
+      "search_params" : [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000},
+        {"nprobe":2000}
+      ],
+      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist16384-int8"
+    },
+
+    {
+      "name" : "faiss_flat",
+      "algo" : "faiss_gpu_flat",
+      "build_param": {},
+      "file" : "index/glove-100-inner/faiss_flat/flat",
+      "search_params" : [{}],
+      "search_result_file" : "result/glove-100-inner/faiss_flat/flat"
+    },
+
+    {
+      "name" : "ggnn.kbuild96-segment64-refine2-k10",
+      "algo" : "ggnn",
+      "build_param": {
+        "k_build": 96,
+        "segment_size": 64,
+        "refine_iterations": 2,
+        "dataset_size": 1183514,
+        "k": 10
+      },
+      "file" : "index/glove-100-inner/ggnn/kbuild96-segment64-refine2-k10",
+      "search_params" : [
+        {"tau":0.001, "block_dim":64, "sorted_size":32},
+        {"tau":0.005, "block_dim":64, "sorted_size":32},
+        {"tau":0.01,  "block_dim":64, "sorted_size":32},
+        {"tau":0.02,  "block_dim":64, "sorted_size":32},
+        {"tau":0.03,  "block_dim":64, "sorted_size":32},
+        {"tau":0.04,  "block_dim":64, "sorted_size":32},
+        {"tau":0.05,  "block_dim":64, "sorted_size":32},
+        {"tau":0.06,  "block_dim":64, "sorted_size":32},
+        {"tau":0.09,  "block_dim":64, "sorted_size":32},
+        {"tau":0.12,  "block_dim":64, "sorted_size":32},
+        {"tau":0.18,  "block_dim":64, "sorted_size":32},
+        {"tau":0.21,  "block_dim":64, "sorted_size":32},
+        {"tau":0.24,  "block_dim":64, "sorted_size":32},
+        {"tau":0.27,  "block_dim":64, "sorted_size":32},
+        {"tau":0.3,   "block_dim":64, "sorted_size":32},
+        {"tau":0.4,   "block_dim":64, "sorted_size":32},
+        {"tau":0.01, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.02, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.03, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.04, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.05, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.06, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.09, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.12, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.18, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.21, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.24, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.27, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.3,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.4,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
+        {"tau":0.5,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}
+
+      ],
+      "search_result_file" : "result/glove-100-inner/ggnn/kbuild96-segment64-refine2-k10"
+    }]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json b/python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json
new file mode 100644
index 0000000000..c4b8905b1d
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json
@@ -0,0 +1,1351 @@
+{
+  "dataset": {
+    "name": "sift-128-euclidean",
+    "base_file": "/home/cjnolet/workspace/ann_data/sift-128-euclidean/base.fbin",
+    "query_file": "/home/cjnolet/workspace/ann_data/sift-128-euclidean/query.fbin",
+    "distance": "euclidean"
+  },
+  "search_basic_param": {
+    "batch_size": 5000,
+    "k": 10,
+    "run_count": 3
+  },
+  "index": [
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file" : "index/sift-128-euclidean/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/sift-128-euclidean/hnswlib/M12"
+    },
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file" : "index/sift-128-euclidean/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/sift-128-euclidean/hnswlib/M16"
+    },
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file" : "index/sift-128-euclidean/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/sift-128-euclidean/hnswlib/M24"
+    },
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file" : "index/sift-128-euclidean/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/sift-128-euclidean/hnswlib/M36"
+    },
+
+
+
+
+    {
+      "name": "raft_bfknn",
+      "algo": "raft_bfknn",
+      "build_param": {},
+      "file": "index/sift-128-euclidean/raft_bfknn/bfknn",
+      "search_params": [
+        {
+          "probe": 1
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_bfknn/bfknn"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 1024
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist1024"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 2048
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist2048",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist2048"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 4096
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist4096",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist4096"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 8192
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist8192",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist8192"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 16384
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist16384"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": true
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": false
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "int8"
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist1024-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "int8"
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist2048-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "int8"
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist4096-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "int8"
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist8192-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "int8"
+      },
+      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist16384-int8"
+    },
+    {
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
+      "build_param": {},
+      "file": "index/sift-128-euclidean/faiss_flat/flat",
+      "search_params": [
+        {}
+      ],
+      "search_result_file": "result/sift-128-euclidean/faiss_flat/flat"
+    },
+
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 5,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 32,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 16,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 512,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_flat.nlist1024",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 1024,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_flat/nlist1024"
+    },
+    {
+      "name": "raft_ivf_flat.nlist16384",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 16384,
+        "ratio": 2,
+        "niter": 20
+      },
+      "file": "index/sift-128-euclidean/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/sift-128-euclidean/raft_ivf_flat/nlist16384"
+    },
+
+    {
+      "name" : "raft_cagra.dim32",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 32
+      },
+      "file" : "index/sift-128-euclidean/raft_cagra/dim32",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/sift-128-euclidean/raft_cagra/dim32"
+    },
+
+    {
+      "name" : "raft_cagra.dim64",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 64
+      },
+      "file" : "index/sift-128-euclidean/raft_cagra/dim64",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/sift-128-euclidean/raft_cagra/dim64"
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py b/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
new file mode 100644
index 0000000000..cd67d9c8b8
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
@@ -0,0 +1,47 @@
+#
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+
+
+def split_groundtruth(groundtruth_filepath):
+    ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
+                                         "cpp/bench/ann/scripts")
+    ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
+                                          "split_groundtruth.pl")
+    pwd = os.getcwd()
+    os.chdir("/".join(groundtruth_filepath.split("/")[:-1]))
+    groundtruth_filename = groundtruth_filepath.split("/")[-1]
+    p = subprocess.Popen([ann_bench_scripts_path, groundtruth_filename, 
+                          "groundtruth"])
+    p.wait()
+    os.chdir(pwd)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("--groundtruth",
+                        help="Path to billion-scale dataset groundtruth file",
+                        required=True)
+    args = parser.parse_args()
+
+    split_groundtruth(args.groundtruth)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cpp/bench/ann/scripts/split_groundtruth.pl b/python/raft-ann-bench/raft-ann-bench/split_groundtruth/split_groundtruth.pl
similarity index 100%
rename from cpp/bench/ann/scripts/split_groundtruth.pl
rename to python/raft-ann-bench/raft-ann-bench/split_groundtruth/split_groundtruth.pl
diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml
index bdbcf61e0f..3c81b6f16b 100644
--- a/python/raft-dask/pyproject.toml
+++ b/python/raft-dask/pyproject.toml
@@ -35,8 +35,8 @@ license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
     "dask-cuda==23.10.*",
-    "dask>=2023.7.1",
-    "distributed>=2023.7.1",
+    "dask==2023.7.1",
+    "distributed==2023.7.1",
     "joblib>=0.11",
     "numba>=0.57",
     "numpy>=1.21",

From e0586de78aa36e7cf5b202a41b7adf7f5c464d0d Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Thu, 24 Aug 2023 22:42:25 -0500
Subject: [PATCH 14/70] FIX fixing straggling changes from rebase

---
 .../raft-ann-bench/data_export/eval.pl        |  430 -----
 .../raft-ann-bench/run/conf/bigann-100M.json  |  248 ++-
 .../raft-ann-bench/run/conf/deep-100M.json    |  867 ++--------
 .../raft-ann-bench/run/conf/deep-1B.json      |   36 +-
 .../run/conf/deep-image-96-angular.json       | 1366 +++++++++++++++
 .../run/conf/fashion-mnist-784-euclidean.json | 1366 +++++++++++++++
 .../run/conf/gist-960-euclidean.json          | 1351 +++++++++++++++
 .../run/conf/glove-100-angular.json           | 1351 +++++++++++++++
 .../run/conf/glove-100-inner.json             |  512 +++---
 .../run/conf/glove-50-angular.json            | 1351 +++++++++++++++
 .../run/conf/lastfm-65-angular.json           | 1351 +++++++++++++++
 .../run/conf/mnist-784-euclidean.json         | 1351 +++++++++++++++
 .../run/conf/nytimes-256-angular.json         | 1351 +++++++++++++++
 .../run/conf/sift-128-euclidean.json          | 1473 ++++-------------
 14 files changed, 11653 insertions(+), 2751 deletions(-)
 delete mode 100755 python/raft-ann-bench/raft-ann-bench/data_export/eval.pl
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/deep-image-96-angular.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/gist-960-euclidean.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-angular.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/glove-50-angular.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/lastfm-65-angular.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/mnist-784-euclidean.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/nytimes-256-angular.json

diff --git a/python/raft-ann-bench/raft-ann-bench/data_export/eval.pl b/python/raft-ann-bench/raft-ann-bench/data_export/eval.pl
deleted file mode 100755
index 81c5563d79..0000000000
--- a/python/raft-ann-bench/raft-ann-bench/data_export/eval.pl
+++ /dev/null
@@ -1,430 +0,0 @@
-#!/usr/bin/perl
-
-# =============================================================================
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-
-use warnings;
-use strict;
-use autodie qw(open close);
-use File::Find;
-use Getopt::Std;
-
-my $QPS = 'QPS';
-my $AVG_LATENCY = 'avg_latency(ms)';
-my $P99_LATENCY = 'p99_latency(ms)';
-my $P999_LATENCY = 'p999_latency(ms)';
-my @CONDITIONS = ([$QPS, 2000], ['recall', 0.9], ['recall', 0.95]);
-
-
-my $USAGE = << 'END';
-usage: [-f] [-l avg|p99|p999] [-o output.csv] groundtruth.neighbors.ibin result_paths...
-  result_paths... are paths to the search result files.
-    Can specify multiple paths.
-    For each of them, if it's a directory, all the .txt files found under
-    it recursively will be regarded as inputs.
-
-  -f: force to recompute recall and update it in result file if needed
-  -l: output search latency rather than QPS. Available options:
-        "avg" for average latency;
-        "p99" for 99th percentile latency;
-        "p999" for 99.9th percentile latency.
-  -o: also write result to a csv file
-END
-
-
-my %opt;
-getopts('fl:o:', \%opt)
-  or die $USAGE;
-my $force_calc_recall = exists $opt{f} ? 1 : 0;
-my $csv_file;
-$csv_file = $opt{o} if exists $opt{o};
-my $metric = $QPS;
-if (exists $opt{l}) {
-    my $option = $opt{l};
-    if ($option eq 'avg') {
-        $metric = $AVG_LATENCY;
-    }
-    elsif ($option eq 'p99') {
-        $metric = $P99_LATENCY;
-    }
-    elsif ($option eq 'p999') {
-        $metric = $P999_LATENCY;
-    }
-    else {
-        die
-          "[error] illegal value for '-l': '$option'. Must be 'avg', 'p99' or 'p999'\n";
-    }
-}
-
-@ARGV >= 2
-  or die $USAGE;
-
-
-my $truth_file = shift @ARGV;
-my ($k, $dataset, $distance, $results) = get_all_results($metric, @ARGV);
-if (!defined $k) {
-    print STDERR "no result file found\n";
-    exit -1;
-}
-print STDERR "dataset = $dataset, distance = $distance, k = $k\n\n";
-calc_missing_recall($results, $truth_file, $force_calc_recall);
-
-my @results = sort {
-         $a->{name} cmp $b->{name}
-      or $a->{recall} <=> $b->{recall}
-      or $b->{qps} <=> $a->{qps}
-} @$results;
-printf("%-60s  %6s %16s  %s\n", '', 'Recall', $metric, 'search_param');
-for my $result (@results) {
-    my $fmt = ($metric eq $QPS) ? '%16.1f' : '%16.3f';
-    my $qps = $result->{qps};
-    $qps *= 1000 if $metric ne $QPS;    # the unit of latency is ms
-    printf("%-60s  %6.4f ${fmt}  %s\n",
-        $result->{name}, $result->{recall}, $qps, $result->{search_param});
-}
-if (defined $csv_file) {
-    open my $fh, '>', $csv_file;
-    print {$fh} ",Recall,${metric},search_param\n";
-    for my $result (@results) {
-        my $qps = $result->{qps};
-        $qps *= 1000 if $metric ne $QPS;
-        printf {$fh} (
-            "%s,%.4f,%.3f,%s\n", $result->{name}, $result->{recall},
-            $qps, $result->{search_param}
-        );
-    }
-}
-print "\n";
-calc_and_print_estimation($results, $metric, \@CONDITIONS);
-
-
-
-
-sub read_result {
-    my ($fname) = @_;
-    open my $fh, '<', $fname;
-    my %attr;
-    while (<$fh>) {
-        chomp;
-        next if /^\s*$/;
-        my $pos = index($_, ':');
-        $pos != -1
-          or die "[error] no ':' is found: '$_'\n";
-        my $key = substr($_, 0, $pos);
-        my $val = substr($_, $pos + 1);
-        $key =~ s/^\s+|\s+$//g;
-        $val =~ s/^\s+|\s+$//g;
-
-        # old version benchmark compatible
-        if ($key eq 'search_time') {
-            $key = 'average_search_time';
-            $val *= $attr{batch_size};
-        }
-        $attr{$key} = $val;
-    }
-    return \%attr;
-}
-
-sub overwrite_recall_to_result {
-    my ($fname, $recall) = @_;
-    open my $fh_in, '<', $fname;
-    $recall = sprintf("%f", $recall);
-    my $out;
-    while (<$fh_in>) {
-        s/^recall: .*/recall: $recall/;
-        $out .= $_;
-    }
-    close $fh_in;
-
-    open my $fh_out, '>', $fname;
-    print {$fh_out} $out;
-}
-
-sub append_recall_to_result {
-    my ($fname, $recall) = @_;
-    open my $fh, '>>', $fname;
-    printf {$fh} ("recall: %f\n", $recall);
-}
-
-sub get_all_results {
-    my ($metric) = shift @_;
-
-    my %fname;
-    my $wanted = sub {
-        if (-f && /\.txt$/) {
-            $fname{$File::Find::name} = 1;
-        }
-    };
-    find($wanted, @_);
-
-    my $k;
-    my $dataset;
-    my $distance;
-    my @results;
-    for my $f (sort keys %fname) {
-        print STDERR "reading $f ...\n";
-        my $attr = read_result($f);
-        if (!defined $k) {
-            $k = $attr->{k};
-            $dataset = $attr->{dataset};
-            $distance = $attr->{distance};
-        }
-        else {
-            $attr->{k} eq $k
-              or die "[error] k should be $k, but is $attr->{k} in $f\n";
-            $attr->{dataset} eq $dataset
-              or die
-              "[error] dataset should be $dataset, but is $attr->{dataset} in $f\n";
-            $attr->{distance} eq $distance
-              or die
-              "[error] distance should be $distance, but is $attr->{distance} in $f\n";
-        }
-
-        my $batch_size = $attr->{batch_size};
-        $batch_size =~ s/000000$/M/;
-        $batch_size =~ s/000$/K/;
-        my $search_param = $attr->{search_param};
-        $search_param =~ s/^{//;
-        $search_param =~ s/}$//;
-        $search_param =~ s/,/ /g;
-        $search_param =~ s/"//g;
-
-        my $qps;
-        if ($metric eq $QPS) {
-            $qps = $attr->{batch_size} / $attr->{average_search_time};
-        }
-        elsif ($metric eq $AVG_LATENCY) {
-            $qps = $attr->{average_search_time};
-        }
-        elsif ($metric eq $P99_LATENCY) {
-            exists $attr->{p99_search_time}
-              or die "[error] p99_search_time is not found\n";
-            $qps = $attr->{p99_search_time};
-        }
-        elsif ($metric eq $P999_LATENCY) {
-            exists $attr->{p999_search_time}
-              or die "[error] p999_search_time is not found\n";
-            $qps = $attr->{p999_search_time};
-        }
-        else {
-            die "[error] unknown latency type: '$metric'\n";
-        }
-        my $result = {
-            file => $f,
-            name => "$attr->{name}-batch${batch_size}",
-            search_param => $search_param,
-            qps => $qps,
-        };
-
-        if (exists $attr->{recall}) {
-            $result->{recall} = $attr->{recall};
-        }
-        push @results, $result;
-    }
-    return $k, $dataset, $distance, \@results;
-}
-
-sub read_ibin {
-    my ($fname) = @_;
-
-    open my $fh, '<:raw', $fname;
-    my $raw;
-
-    read($fh, $raw, 8);
-    my ($nrows, $dim) = unpack('LL', $raw);
-
-    my $expected_size = 8 + $nrows * $dim * 4;
-    my $size = (stat($fh))[7];
-    $size == $expected_size
-      or die(
-        "[error] expected size is $expected_size, but actual size is $size\n");
-
-    read($fh, $raw, $nrows * $dim * 4) == $nrows * $dim * 4
-      or die "[error] read $fname failed\n";
-    my @data = unpack('l' x ($nrows * $dim), $raw);
-    return \@data, $nrows, $dim;
-}
-
-sub pick_k_neighbors {
-    my ($neighbors, $nrows, $ncols, $k) = @_;
-
-    my @res;
-    for my $i (0 .. $nrows - 1) {
-        my %neighbor_set;
-        for my $j (0 .. $k - 1) {
-            $neighbor_set{$neighbors->[$i * $ncols + $j]} = 1;
-        }
-        push @res, \%neighbor_set;
-    }
-    return \@res;
-}
-
-
-sub calc_recall {
-    my ($truth_k_neighbors, $result_neighbors, $nrows, $k) = @_;
-
-    my $recall = 0;
-    for my $i (0 .. $nrows - 1) {
-        my $tp = 0;
-        for my $j (0 .. $k - 1) {
-            my $neighbor = $result_neighbors->[$i * $k + $j];
-            ++$tp if exists $truth_k_neighbors->[$i]{$neighbor};
-        }
-        $recall += $tp;
-    }
-    return $recall / $k / $nrows;
-}
-
-sub calc_missing_recall {
-    my ($results, $truth_file, $force_calc_recall) = @_;
-
-    my $need_calc_recall = grep { !exists $_->{recall} } @$results;
-    return unless $need_calc_recall || $force_calc_recall;
-
-    my ($truth_neighbors, $nrows, $truth_k) = read_ibin($truth_file);
-    $truth_k >= $k
-      or die "[error] ground truth k ($truth_k) < k($k)\n";
-    my $truth_k_neighbors =
-      pick_k_neighbors($truth_neighbors, $nrows, $truth_k, $k);
-
-    for my $result (@$results) {
-        next if exists $result->{recall} && !$force_calc_recall;
-
-        my $result_bin_file = $result->{file};
-        $result_bin_file =~ s/txt$/ibin/;
-        print STDERR "calculating recall for $result_bin_file ...\n";
-        my ($result_neighbors, $result_nrows, $result_k) =
-          read_ibin($result_bin_file);
-        $result_k == $k
-          or die
-          "[error] k should be $k, but is $result_k in $result_bin_file\n";
-        $result_nrows == $nrows
-          or die
-          "[error] #row should be $nrows, but is $result_nrows in $result_bin_file\n";
-
-        my $recall =
-          calc_recall($truth_k_neighbors, $result_neighbors, $nrows, $k);
-        if (exists $result->{recall}) {
-            my $new_value = sprintf("%f", $recall);
-            if ($result->{recall} ne $new_value) {
-                print "update recall: $result->{recall} -> $new_value\n";
-                overwrite_recall_to_result($result->{file}, $recall);
-            }
-        }
-        else {
-            append_recall_to_result($result->{file}, $recall);
-        }
-        $result->{recall} = $recall;
-    }
-}
-
-
-sub estimate {
-    my ($results, $condition, $value) = @_;
-    my %point_of;
-    for my $result (@$results) {
-        my $point;
-        if ($condition eq 'recall') {
-            $point = [$result->{recall}, $result->{qps}];
-        }
-        else {
-            $point = [$result->{qps}, $result->{recall}];
-        }
-        push @{$point_of{$result->{name}}}, $point;
-    }
-
-    my @names = sort keys %point_of;
-    my @result;
-    for my $name (@names) {
-        my @points = sort { $a->[0] <=> $b->[0] } @{$point_of{$name}};
-        if ($value < $points[0][0] || $value > $points[$#points][0]) {
-            push @result, -1;
-            next;
-        }
-        elsif ($value == $points[0][0]) {
-            push @result, $points[0][1];
-            next;
-        }
-
-        for my $i (1 .. $#points) {
-            if ($points[$i][0] >= $value) {
-                push @result,
-                  linear_interpolation($value, @{$points[$i - 1]},
-                    @{$points[$i]});
-                last;
-            }
-        }
-    }
-    return \@names, \@result;
-}
-
-sub linear_interpolation {
-    my ($x, $x1, $y1, $x2, $y2) = @_;
-    return $y1 + ($x - $x1) * ($y2 - $y1) / ($x2 - $x1);
-}
-
-sub merge {
-    my ($all, $new, $scale) = @_;
-    @$all == @$new
-      or die "[error] length is not equal\n";
-    for my $i (0 .. @$all - 1) {
-        push @{$all->[$i]}, $new->[$i] * $scale;
-    }
-}
-
-sub calc_and_print_estimation {
-    my ($results, $metric, $conditions) = @_;
-
-    my @conditions = grep {
-        my $target = $_->[0];
-        if ($target eq 'recall' || $target eq $metric) {
-            1;
-        }
-        else {
-                 $target eq $QPS
-              || $target eq $AVG_LATENCY
-              || $target eq $P99_LATENCY
-              || $target eq $P999_LATENCY
-              or die "[error] unknown condition: '$target'\n";
-            0;
-        }
-    } @$conditions;
-
-    my @headers = map {
-        my $header;
-        if ($_->[0] eq 'recall') {
-            $header = $metric . '@recall' . $_->[1];
-        }
-        elsif ($_->[0] eq $metric) {
-            $header = 'recall@' . $metric . $_->[1];
-        }
-        $header;
-    } @conditions;
-
-    my $scale = ($metric eq $QPS) ? 1 : 1000;
-    my $estimations;
-    for my $condition (@conditions) {
-        my ($names, $estimate) = estimate($results, @$condition);
-        if (!defined $estimations) {
-            @$estimations = map { [$_] } @$names;
-        }
-        merge($estimations, $estimate, $scale);
-    }
-
-    my $fmt = "%-60s" . ("  %16s" x @headers) . "\n";
-    printf($fmt, '', @headers);
-    $fmt =~ s/16s/16.4f/g;
-    for (@$estimations) {
-        printf($fmt, @$_);
-    }
-}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json b/python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json
index 0e59936f0e..c691c68299 100644
--- a/python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json
@@ -1,79 +1,90 @@
 {
-  "dataset" : {
-    "name" : "bigann-100M",
-    "base_file" : "data/bigann-1B/base.1B.u8bin",
-    "subset_size" : 100000000,
-    "query_file" : "data/bigann-1B/query.public.10K.u8bin",
-    "distance" : "euclidean"
+  "dataset": {
+    "name": "bigann-100M",
+    "base_file": "bigann-1B/base.1B.u8bin",
+    "subset_size": 100000000,
+    "query_file": "bigann-1B/query.public.10K.u8bin",
+    "groundtruth_neighbors_file": "bigann-100M/groundtruth.neighbors.ibin",
+    "distance": "euclidean"
   },
 
-  "search_basic_param" : {
-    "batch_size" : 10000,
-    "k" : 10,
-    "run_count" : 2
+  "search_basic_param": {
+    "batch_size": 10000,
+    "k": 10
   },
 
-  "index" : [
+  "index": [
     {
-      "name": "raft_ivf_pq.dimpq64-cluster5K-float-float",
+      "name": "raft_ivf_pq.dimpq64-cluster5K",
       "algo": "raft_ivf_pq",
-      "build_param": {
-        "niter": 25,
-        "nlist": 5000,
-        "pq_dim": 64,
-        "ratio": 10
-      },
-      "file": "index/bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
+      "build_param": {"niter": 25, "nlist": 5000, "pq_dim": 64, "ratio": 10},
+      "file": "bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
+      "dataset_memtype": "host",
       "search_params": [
-        {
-          "numProbes": 20,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 30,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 40,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 1000,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/bigann-100M/raft_ivf_pq/dimpq64-cluster5K-float-float"
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
     },
     {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
+      "name": "raft_ivf_pq.dimpq64-cluster10K",
+      "algo": "raft_ivf_pq",
+      "build_param": {"niter": 25, "nlist": 10000, "pq_dim": 64, "ratio": 10},
+      "file": "bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
+      "search_params": [
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
+    },
+    {
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
       "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M12",
-      "search_params" : [
+      "file": "bigann-100M/hnswlib/M12",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -84,15 +95,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M12"
+      ]
     },
     {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
       "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M16",
-      "search_params" : [
+      "file": "bigann-100M/hnswlib/M16",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -103,15 +113,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M16"
+      ]
     },
     {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
       "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M24",
-      "search_params" : [
+      "file": "bigann-100M/hnswlib/M24",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -122,15 +131,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M24"
+      ]
     },
     {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
       "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M36",
-      "search_params" : [
+      "file": "bigann-100M/hnswlib/M36",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -141,62 +149,48 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M36"
+      ]
     },
-
-
     {
-      "name" : "raft_ivf_flat.nlist100K",
-      "algo" : "raft_ivf_flat",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/bigann-100M/raft_ivf_flat/nlist100K",
-      "search_params" : [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/bigann-100M/raft_ivf_flat/nlist100K"
+      "name": "raft_ivf_flat.nlist100K",
+      "algo": "raft_ivf_flat",
+      "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
+      "dataset_memtype":"host",
+      "file": "bigann-100M/raft_ivf_flat/nlist100K",
+      "search_params": [
+        {"max_batch":10000, "max_k":10, "nprobe":20},
+        {"max_batch":10000, "max_k":10, "nprobe":30},
+        {"max_batch":10000, "max_k":10, "nprobe":40},
+        {"max_batch":10000, "max_k":10, "nprobe":50},
+        {"max_batch":10000, "max_k":10, "nprobe":100},
+        {"max_batch":10000, "max_k":10, "nprobe":200},
+        {"max_batch":10000, "max_k":10, "nprobe":500},
+        {"max_batch":10000, "max_k":10, "nprobe":1000}
+      ]
     },
-
     {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/bigann-100M/raft_cagra/dim32",
-      "search_params" : [
+      "name": "raft_cagra.dim32",
+      "algo": "raft_cagra",
+      "dataset_memtype": "host",
+      "build_param": {"index_dim": 32},
+      "file": "bigann-100M/raft_cagra/dim32",
+      "search_params": [
         {"itopk": 32},
         {"itopk": 64},
         {"itopk": 128}
-      ],
-      "search_result_file" : "result/bigann-100M/raft_cagra/dim32"
+      ]
     },
-
-
     {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/bigann-100M/raft_cagra/dim64",
-      "search_params" : [
+      "name": "raft_cagra.dim64",
+      "algo": "raft_cagra",
+      "dataset_memtype":"host",
+      "build_param": {"index_dim": 64},
+      "file": "bigann-100M/raft_cagra/dim64",
+      "search_params": [
         {"itopk": 32},
         {"itopk": 64},
         {"itopk": 128}
-      ],
-      "search_result_file" : "result/bigann-100M/raft_cagra/dim64"
+      ]
     }
   ]
 }
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json
index 2f2ec92489..6591957961 100644
--- a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json
@@ -1,25 +1,25 @@
 {
-  "dataset" : {
-    "name" : "deep-100M",
-    "base_file" : "data/deep-1B/base.1B.fbin",
-    "subset_size" : 100000000,
-    "query_file" : "data/deep-1B/query.public.10K.fbin",
-    "distance" : "euclidean"
+  "dataset": {
+    "name": "deep-100M",
+    "base_file": "data/deep-1B/base.1B.fbin",
+    "subset_size": 100000000,
+    "query_file": "data/deep-1B/query.public.10K.fbin",
+    "groundtruth_neighbors_file": "deep-100M/groundtruth.neighbors.ibin",
+    "distance": "euclidean"
   },
 
-  "search_basic_param" : {
-    "batch_size" : 10000,
-    "k" : 10,
-    "run_count" : 2
+  "search_basic_param": {
+    "batch_size": 10000,
+    "k": 10
   },
 
-  "index" : [
+  "index": [
     {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
       "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M12",
-      "search_params" : [
+      "file": "deep-100M/hnswlib/M12",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -30,15 +30,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M12"
+      ]
     },
     {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
       "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M16",
-      "search_params" : [
+      "file": "deep-100M/hnswlib/M16",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -49,15 +48,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M16"
+      ]
     },
     {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
       "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M24",
-      "search_params" : [
+      "file": "deep-100M/hnswlib/M24",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -68,15 +66,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M24"
+      ]
     },
     {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
       "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M36",
-      "search_params" : [
+      "file": "deep-100M/hnswlib/M36",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -87,15 +84,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M36"
+      ]
     },
     {
-      "name" : "faiss_ivf_flat.nlist50K",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist50K",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":50000},
-      "file" : "index/deep-100M/faiss_ivf_flat/nlist50K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_flat/nlist50K",
+      "search_params": [
         {"nprobe":20},
         {"nprobe":30},
         {"nprobe":40},
@@ -104,15 +100,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist50K"
+      ]
     },
     {
-      "name" : "faiss_ivf_flat.nlist100K",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist100K",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":100000},
-      "file" : "index/deep-100M/faiss_ivf_flat/nlist100K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_flat/nlist100K",
+      "search_params": [
         {"nprobe":20},
         {"nprobe":30},
         {"nprobe":40},
@@ -121,15 +116,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist100K"
+      ]
     },
     {
-      "name" : "faiss_ivf_flat.nlist200K",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist200K",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":200000},
-      "file" : "index/deep-100M/faiss_ivf_flat/nlist200K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_flat/nlist200K",
+      "search_params": [
         {"nprobe":20},
         {"nprobe":30},
         {"nprobe":40},
@@ -138,17 +132,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist200K"
+      ]
     },
-
-
     {
-      "name" : "faiss_ivf_pq.M48-nlist16K",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M48-nlist16K",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":16384, "M":48},
-      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist16K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_pq/M48-nlist16K",
+      "search_params": [
         {"nprobe":10},
         {"nprobe":20},
         {"nprobe":30},
@@ -157,15 +148,14 @@
         {"nprobe":100},
         {"nprobe":200},
         {"nprobe":500}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist16K"
+      ]
     },
     {
-      "name" : "faiss_ivf_pq.M48-nlist50K",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M48-nlist50K",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":50000, "M":48},
-      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist50K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_pq/M48-nlist50K",
+      "search_params": [
         {"nprobe":20},
         {"nprobe":30},
         {"nprobe":40},
@@ -174,15 +164,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist50K"
+      ]
     },
     {
-      "name" : "faiss_ivf_pq.M48-nlist100K",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M48-nlist100K",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":100000, "M":48},
-      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist100K",
-      "search_params" : [
+      "file": "deep-100M/faiss_ivf_pq/M48-nlist100K",
+      "search_params": [
         {"nprobe":20},
         {"nprobe":30},
         {"nprobe":40},
@@ -191,659 +180,107 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist100K"
+      ]
     },
-
-
     {
-      "name" : "raft_ivf_flat.nlist10K",
-      "algo" : "raft_ivf_flat",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_flat/nlist10K",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_flat/nlist10K"
+      "name": "raft_ivf_flat.nlist100K",
+      "algo": "raft_ivf_flat",
+      "dataset_memtype":"host",
+      "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
+      "file": "deep-100M/raft_ivf_flat/nlist100K",
+      "search_params": [
+        {"max_batch":10000, "max_k":10, "nprobe":20},
+        {"max_batch":10000, "max_k":10, "nprobe":30},
+        {"max_batch":10000, "max_k":10, "nprobe":40},
+        {"max_batch":10000, "max_k":10, "nprobe":50},
+        {"max_batch":10000, "max_k":10, "nprobe":100},
+        {"max_batch":10000, "max_k":10, "nprobe":200},
+        {"max_batch":10000, "max_k":10, "nprobe":500},
+        {"max_batch":10000, "max_k":10, "nprobe":1000}
+      ]
     },
     {
-      "name" : "raft_ivf_flat.nlist100K",
-      "algo" : "raft_ivf_flat",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_flat/nlist100K",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_flat/nlist100K"
+      "name": "raft_cagra.dim32",
+      "algo": "raft_cagra",
+      "dataset_memtype":"host",
+      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "file": "deep-100M/raft_cagra/dim32",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "single_cta"},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "single_cta"},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "single_cta"},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "single_cta"},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "single_cta"},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "single_cta"},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "single_cta"},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "single_cta"},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "single_cta"},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "single_cta"},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "single_cta"},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "single_cta"}
+      ]
     },
-
     {
-      "name" : "raft_ivf_pq.nlist10K",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist10K",
-      "search_params" : [
-        {"nprobe":3},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10K"
-    },
+      "name": "raft_cagra.dim32.multi_cta",
+      "algo": "raft_cagra",
+      "dataset_memtype":"host",
+      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "file": "deep-100M/raft_cagra/dim32",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta"},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta"},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta"},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta"},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta"},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta"},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta"},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta"},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta"},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta"},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta"},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta"}
+      ]
     {
-      "name" : "raft_ivf_pq.nlist10Kdim64",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 64
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist10Kdim64",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10Kdim64"
+      "name": "raft_cagra.dim32.multi_kernel",
+      "algo": "raft_cagra",
+      "dataset_memtype":"host",
+      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "file": "deep-100M/raft_cagra/dim32",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_kernel"},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_kernel"},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_kernel"},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_kernel"},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_kernel"},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_kernel"},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_kernel"},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_kernel"},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_kernel"},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_kernel"},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_kernel"},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_kernel"}
+      ]
     },
     {
-      "name" : "raft_ivf_pq.nlist10Kdim32",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 32
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist10Kdim32",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10Kdim32"
-    },
-    {
-      "name" : "raft_ivf_pq.nlist100K",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist100K",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100K"
-    },
-    {
-      "name" : "raft_ivf_pq.nlist100Kdim64",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 64
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist100Kdim64",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100Kdim64"
-    },
-    {
-      "name" : "raft_ivf_pq.nlist100Kdim32",
-      "algo" : "raft_ivf_pq",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 32
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist100Kdim32",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100Kdim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim": 32,
-        "intermediate_graph_degree": 48
-      },
-      "file": "index/deep-100M/raft_cagra/dim32",
-      "search_params" : [
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        }
-      ],
-      "search_result_file": "result/deep-100M/raft_cagra/dim32"
-    },
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim": 64
-      },
-      "file": "index/deep-100M/raft_cagra/dim64",
-      "search_params" : [
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        }
-      ],
-      "search_result_file": "result/deep-100M/raft_cagra/dim64"
+      "name": "raft_cagra.dim64",
+      "algo": "raft_cagra",
+      "dataset_memtype":"host",
+      "build_param": {"index_dim": 64},
+      "file": "deep-100M/raft_cagra/dim64",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64}
+      ]
     }
   ]
 }
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json
index 50d1b87602..632d2f7308 100644
--- a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json
@@ -1,25 +1,24 @@
 {
-  "dataset" : {
-    "name" : "deep-1B",
-    "base_file" : "data/deep-1B/base.1B.fbin",
-    "query_file" : "data/deep-1B/query.public.10K.fbin",
-    // although distance should be "euclidean", faiss becomes much slower for that
-    "distance" : "inner_product"
+  "dataset": {
+    "name": "deep-1B",
+    "base_file": "deep-1B/base.1B.fbin",
+    "query_file": "deep-1B/query.public.10K.fbin",
+    "groundtruth_neighbors_file": "deep-1B/groundtruth.neighbors.ibin",
+    "distance": "inner_product"
   },
 
-  "search_basic_param" : {
-    "batch_size" : 10000,
-    "k" : 10,
-    "run_count" : 2
+  "search_basic_param": {
+    "batch_size": 10000,
+    "k": 10
   },
 
-  "index" : [
+  "index": [
     {
-      "name" : "faiss_ivf_pq.M48-nlist50K",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M48-nlist50K",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":50000, "M":48},
-      "file" : "index/deep-1B/faiss_ivf_pq/M48-nlist50K",
-      "search_params" : [
+      "file": "deep-1B/faiss_ivf_pq/M48-nlist50K",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -29,10 +28,7 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/deep-1B/faiss_ivf_pq/M48-nlist50K"
-    },
-
-
+      ]
+    }
   ]
 }
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-image-96-angular.json b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-image-96-angular.json
new file mode 100644
index 0000000000..4467e09dab
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-image-96-angular.json
@@ -0,0 +1,1366 @@
+{
+  "dataset": {
+    "name": "deep-image-96-angular",
+    "base_file": "data/deep-image-96-angular/base.fbin",
+    "query_file": "data/deep-image-96-angular/query.fbin",
+    "distance": "euclidean"
+  },
+  "search_basic_param": {
+    "batch_size": 5000,
+    "k": 10,
+    "run_count": 3
+  },
+  "index": [
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file" : "index/deep-image-96-angular/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/deep-image-96-angular/hnswlib/M12"
+    },
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file" : "index/deep-image-96-angular/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/deep-image-96-angular/hnswlib/M16"
+    },
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file" : "index/deep-image-96-angular/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/deep-image-96-angular/hnswlib/M24"
+    },
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file" : "index/deep-image-96-angular/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/deep-image-96-angular/hnswlib/M36"
+    },
+
+
+
+
+    {
+      "name": "raft_bfknn",
+      "algo": "raft_bfknn",
+      "dataset_memtype": "device",
+      "build_param": {},
+      "file": "index/deep-image-96-angular/raft_bfknn/bfknn",
+      "search_params": [
+        {
+          "probe": 1
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_bfknn/bfknn"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 1024
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist1024"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 2048
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist2048",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist2048"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 4096
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist4096",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist4096"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 8192
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist8192",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist8192"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 16384
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist16384"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": true
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": false
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist1024-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist2048-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist4096-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist8192-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist16384-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "int8"
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist1024-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "int8"
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist2048-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "int8"
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist4096-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "int8"
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist8192-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "int8"
+      },
+      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist16384-int8"
+    },
+    {
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
+      "build_param": {},
+      "file": "index/deep-image-96-angular/faiss_flat/flat",
+      "search_params": [
+        {}
+      ],
+      "search_result_file": "result/deep-image-96-angular/faiss_flat/flat"
+    },
+
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 5,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 32,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 16,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 512,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_flat.nlist1024",
+      "algo": "raft_ivf_flat",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_flat/nlist1024"
+    },
+    {
+      "name": "raft_ivf_flat.nlist16384",
+      "algo": "raft_ivf_flat",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 16384,
+        "ratio": 2,
+        "niter": 20
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_flat/nlist16384"
+    },
+
+    {
+      "name" : "raft_cagra.dim32",
+      "algo" : "raft_cagra",
+      "dataset_memtype": "device",
+      "build_param": {
+        "index_dim" : 32
+      },
+      "file" : "index/deep-image-96-angular/raft_cagra/dim32",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/deep-image-96-angular/raft_cagra/dim32"
+    },
+
+    {
+      "name" : "raft_cagra.dim64",
+      "algo" : "raft_cagra",
+      "dataset_memtype": "device",
+      "build_param": {
+        "index_dim" : 64
+      },
+      "file" : "index/deep-image-96-angular/raft_cagra/dim64",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/deep-image-96-angular/raft_cagra/dim64"
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json b/python/raft-ann-bench/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json
new file mode 100644
index 0000000000..5a0713ca0b
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json
@@ -0,0 +1,1366 @@
+{
+  "dataset": {
+    "name": "fashion-mnist-784-euclidean",
+    "base_file": "data/fashion-mnist-784-euclidean/base.fbin",
+    "query_file": "data/fashion-mnist-784-euclidean/query.fbin",
+    "distance": "euclidean"
+  },
+  "search_basic_param": {
+    "batch_size": 5000,
+    "k": 10,
+    "run_count": 3
+  },
+  "index": [
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M12"
+    },
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M16"
+    },
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M24"
+    },
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M36"
+    },
+
+
+
+
+    {
+      "name": "raft_bfknn",
+      "algo": "raft_bfknn",
+      "dataset_memtype": "device",
+      "build_param": {},
+      "file": "index/fashion-mnist-784-euclidean/raft_bfknn/bfknn",
+      "search_params": [
+        {
+          "probe": 1
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_bfknn/bfknn"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 1024
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist1024"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 2048
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist2048",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist2048"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 4096
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist4096",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist4096"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 8192
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist8192",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist8192"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 16384
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist16384"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": true
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": false
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "int8"
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "int8"
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "int8"
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "int8"
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "int8"
+      },
+      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8"
+    },
+    {
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
+      "build_param": {},
+      "file": "index/fashion-mnist-784-euclidean/faiss_flat/flat",
+      "search_params": [
+        {}
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_flat/flat"
+    },
+
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 5,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 32,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 16,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 512,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_flat.nlist1024",
+      "algo": "raft_ivf_flat",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_flat/nlist1024"
+    },
+    {
+      "name": "raft_ivf_flat.nlist16384",
+      "algo": "raft_ivf_flat",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 16384,
+        "ratio": 2,
+        "niter": 20
+      },
+      "file": "index/fashion-mnist-784-euclidean/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_flat/nlist16384"
+    },
+
+    {
+      "name" : "raft_cagra.dim32",
+      "algo" : "raft_cagra",
+      "dataset_memtype": "device",
+      "build_param": {
+        "index_dim" : 32
+      },
+      "file" : "index/fashion-mnist-784-euclidean/raft_cagra/dim32",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/fashion-mnist-784-euclidean/raft_cagra/dim32"
+    },
+
+    {
+      "name" : "raft_cagra.dim64",
+      "algo" : "raft_cagra",
+      "dataset_memtype": "device",
+      "build_param": {
+        "index_dim" : 64
+      },
+      "file" : "index/fashion-mnist-784-euclidean/raft_cagra/dim64",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/fashion-mnist-784-euclidean/raft_cagra/dim64"
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/gist-960-euclidean.json b/python/raft-ann-bench/raft-ann-bench/run/conf/gist-960-euclidean.json
new file mode 100644
index 0000000000..d03df0f486
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/gist-960-euclidean.json
@@ -0,0 +1,1351 @@
+{
+  "dataset": {
+    "name": "gist-960-euclidean",
+    "base_file": "data/gist-960-euclidean/base.fbin",
+    "query_file": "data/gist-960-euclidean/query.fbin",
+    "distance": "euclidean"
+  },
+  "search_basic_param": {
+    "batch_size": 5000,
+    "k": 10,
+    "run_count": 3
+  },
+  "index": [
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file" : "index/gist-960-euclidean/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/gist-960-euclidean/hnswlib/M12"
+    },
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file" : "index/gist-960-euclidean/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/gist-960-euclidean/hnswlib/M16"
+    },
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file" : "index/gist-960-euclidean/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/gist-960-euclidean/hnswlib/M24"
+    },
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file" : "index/gist-960-euclidean/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/gist-960-euclidean/hnswlib/M36"
+    },
+
+
+
+
+    {
+      "name": "raft_bfknn",
+      "algo": "raft_bfknn",
+      "build_param": {},
+      "file": "index/gist-960-euclidean/raft_bfknn/bfknn",
+      "search_params": [
+        {
+          "probe": 1
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_bfknn/bfknn"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 1024
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist1024"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 2048
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist2048",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist2048"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 4096
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist4096",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist4096"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 8192
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist8192",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist8192"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 16384
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist16384"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": true
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": false
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist1024-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist2048-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist4096-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist8192-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist16384-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "int8"
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist1024-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "int8"
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist2048-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "int8"
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist4096-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "int8"
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist8192-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "int8"
+      },
+      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist16384-int8"
+    },
+    {
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
+      "build_param": {},
+      "file": "index/gist-960-euclidean/faiss_flat/flat",
+      "search_params": [
+        {}
+      ],
+      "search_result_file": "result/gist-960-euclidean/faiss_flat/flat"
+    },
+
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 5,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 32,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 16,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 512,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_flat.nlist1024",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 1024,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_flat/nlist1024"
+    },
+    {
+      "name": "raft_ivf_flat.nlist16384",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 16384,
+        "ratio": 2,
+        "niter": 20
+      },
+      "file": "index/gist-960-euclidean/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/gist-960-euclidean/raft_ivf_flat/nlist16384"
+    },
+
+    {
+      "name" : "raft_cagra.dim32",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 32
+      },
+      "file" : "index/gist-960-euclidean/raft_cagra/dim32",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/gist-960-euclidean/raft_cagra/dim32"
+    },
+
+    {
+      "name" : "raft_cagra.dim64",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 64
+      },
+      "file" : "index/gist-960-euclidean/raft_cagra/dim64",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/gist-960-euclidean/raft_cagra/dim64"
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-angular.json b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-angular.json
new file mode 100644
index 0000000000..1d3dc09988
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-angular.json
@@ -0,0 +1,1351 @@
+{
+  "dataset": {
+    "name": "glove-100-angular",
+    "base_file": "data/glove-100-angular/base.fbin",
+    "query_file": "data/glove-100-angular/query.fbin",
+    "distance": "euclidean"
+  },
+  "search_basic_param": {
+    "batch_size": 5000,
+    "k": 10,
+    "run_count": 3
+  },
+  "index": [
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file" : "index/glove-100-angular/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-angular/hnswlib/M12"
+    },
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file" : "index/glove-100-angular/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-angular/hnswlib/M16"
+    },
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file" : "index/glove-100-angular/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-angular/hnswlib/M24"
+    },
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file" : "index/glove-100-angular/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-100-angular/hnswlib/M36"
+    },
+
+
+
+
+    {
+      "name": "raft_bfknn",
+      "algo": "raft_bfknn",
+      "build_param": {},
+      "file": "index/glove-100-angular/raft_bfknn/bfknn",
+      "search_params": [
+        {
+          "probe": 1
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_bfknn/bfknn"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 1024
+      },
+      "file": "index/glove-100-angular/faiss_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist1024"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 2048
+      },
+      "file": "index/glove-100-angular/faiss_ivf_flat/nlist2048",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist2048"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 4096
+      },
+      "file": "index/glove-100-angular/faiss_ivf_flat/nlist4096",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist4096"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 8192
+      },
+      "file": "index/glove-100-angular/faiss_ivf_flat/nlist8192",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist8192"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 16384
+      },
+      "file": "index/glove-100-angular/faiss_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist16384"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": true
+      },
+      "file": "index/glove-100-angular/faiss_ivf_pq/M64-nlist1024",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": false
+      },
+      "file": "index/glove-100-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/glove-100-angular/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist1024-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/glove-100-angular/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist2048-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/glove-100-angular/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist4096-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/glove-100-angular/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist8192-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/glove-100-angular/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist16384-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "int8"
+      },
+      "file": "index/glove-100-angular/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist1024-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "int8"
+      },
+      "file": "index/glove-100-angular/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist2048-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "int8"
+      },
+      "file": "index/glove-100-angular/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist4096-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "int8"
+      },
+      "file": "index/glove-100-angular/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist8192-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "int8"
+      },
+      "file": "index/glove-100-angular/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist16384-int8"
+    },
+    {
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
+      "build_param": {},
+      "file": "index/glove-100-angular/faiss_flat/flat",
+      "search_params": [
+        {}
+      ],
+      "search_result_file": "result/glove-100-angular/faiss_flat/flat"
+    },
+
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 5,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 32,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-100-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 16,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-100-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 512,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-100-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_flat.nlist1024",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 1024,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-100-angular/raft_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_flat/nlist1024"
+    },
+    {
+      "name": "raft_ivf_flat.nlist16384",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 16384,
+        "ratio": 2,
+        "niter": 20
+      },
+      "file": "index/glove-100-angular/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/glove-100-angular/raft_ivf_flat/nlist16384"
+    },
+
+    {
+      "name" : "raft_cagra.dim32",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 32
+      },
+      "file" : "index/glove-100-angular/raft_cagra/dim32",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/glove-100-angular/raft_cagra/dim32"
+    },
+
+    {
+      "name" : "raft_cagra.dim64",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 64
+      },
+      "file" : "index/glove-100-angular/raft_cagra/dim64",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/glove-100-angular/raft_cagra/dim64"
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json
index 5d0bbf970c..7c95ceb439 100644
--- a/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json
@@ -1,24 +1,24 @@
 {
-  "dataset" : {
-    "name" : "glove-100-inner",
-    "base_file" : "data/glove-100-inner/base.fbin",
-    "query_file" : "data/glove-100-inner/query.fbin",
-    "distance" : "inner_product"
+  "dataset": {
+    "name": "glove-100-inner",
+    "base_file": "glove-100-inner/base.fbin",
+    "query_file": "glove-100-inner/query.fbin",
+    "groundtruth_neighbors_file": "glove-100-inner/groundtruth.neighbors.ibin",
+    "distance": "inner_product"
   },
 
-  "search_basic_param" : {
-    "batch_size" : 1,
-    "k" : 10,
-    "run_count" : 3
+  "search_basic_param": {
+    "batch_size": 1,
+    "k": 10
   },
 
-  "index" : [
+  "index": [
     {
-      "name" : "hnswlib.M4",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M4",
+      "algo": "hnswlib",
       "build_param": {"M":4, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M4",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M4",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -28,16 +28,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M4"
+      ]
     },
-
     {
-      "name" : "hnswlib.M8",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M8",
+      "algo": "hnswlib",
       "build_param": {"M":8, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M8",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M8",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -47,16 +45,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M8"
+      ]
     },
-
     {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
       "build_param": {"M":12, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M12",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M12",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -66,16 +62,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M12"
+      ]
     },
-
     {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
       "build_param": {"M":16, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M16",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M16",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -85,16 +79,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M16"
+      ]
     },
-
     {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
       "build_param": {"M":24, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M24",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M24",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -104,16 +96,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M24"
+      ]
     },
-
     {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
       "build_param": {"M":36, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M36",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M36",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -123,16 +113,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M36"
+      ]
     },
-
     {
-      "name" : "hnswlib.M48",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M48",
+      "algo": "hnswlib",
       "build_param": {"M":48, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M48",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M48",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -142,16 +130,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M48"
+      ]
     },
-
     {
-      "name" : "hnswlib.M64",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M64",
+      "algo": "hnswlib",
       "build_param": {"M":64, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M64",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M64",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -161,16 +147,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M64"
+      ]
     },
-
     {
-      "name" : "hnswlib.M96",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M96",
+      "algo": "hnswlib",
       "build_param": {"M":96, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M96",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M96",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -180,16 +164,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M96"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist1024",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":1024},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -198,16 +180,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist2048",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":2048},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -216,16 +196,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist4096",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":4096},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -234,16 +212,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist8192",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":8192},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -252,16 +228,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist16384",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":16384},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -271,18 +245,17 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist16384"
+      ]
     },
 
 
 
     {
-      "name" : "faiss_ivf_pq.M2-nlist1024",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":1024, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -291,16 +264,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist2048",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist2048",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":2048, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -309,16 +280,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist4096",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist4096",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":4096, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -327,16 +296,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist8192",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist8192",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":8192, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -345,16 +312,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist16384",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist16384",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":16384, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -364,16 +329,14 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist16384"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist1024",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":1024, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -382,16 +345,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist2048",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist2048",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":2048, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -400,16 +361,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist4096",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist4096",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":4096, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -418,16 +377,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist8192",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist8192",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":8192, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -436,16 +393,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist16384",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist16384",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":16384, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -455,16 +410,14 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist16384"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist1024",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":1024, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -473,16 +426,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist2048",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist2048",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":2048, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -491,16 +442,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist4096",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist4096",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":4096, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -509,16 +458,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist8192",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist8192",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":8192, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -527,16 +474,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist16384",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist16384",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":16384, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -546,17 +491,16 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist16384"
+      ]
     },
 
 
     {
-      "name" : "faiss_ivf_sq.nlist1024-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":1024, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist1024-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -565,16 +509,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist1024-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist2048-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":2048, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist2048-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -583,16 +525,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist2048-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist4096-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":4096, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist4096-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -601,16 +541,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist4096-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist8192-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":8192, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist8192-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -619,16 +557,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist8192-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist16384-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":16384, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist16384-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -638,17 +574,14 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist16384-fp16"
+      ]
     },
-
-
     {
-      "name" : "faiss_ivf_sq.nlist1024-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":1024, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist1024-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -657,16 +590,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist1024-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist2048-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":2048, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist2048-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -675,16 +606,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist2048-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist4096-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":4096, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist4096-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -693,16 +622,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist4096-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist8192-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":8192, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist8192-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -711,16 +638,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist8192-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist16384-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":16384, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist16384-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -730,22 +655,18 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist16384-int8"
+      ]
     },
-
     {
-      "name" : "faiss_flat",
-      "algo" : "faiss_gpu_flat",
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
       "build_param": {},
-      "file" : "index/glove-100-inner/faiss_flat/flat",
-      "search_params" : [{}],
-      "search_result_file" : "result/glove-100-inner/faiss_flat/flat"
+      "file": "glove-100-inner/faiss_flat/flat",
+      "search_params": [{}]
     },
-
     {
-      "name" : "ggnn.kbuild96-segment64-refine2-k10",
-      "algo" : "ggnn",
+      "name": "ggnn.kbuild96-segment64-refine2-k10",
+      "algo": "ggnn",
       "build_param": {
         "k_build": 96,
         "segment_size": 64,
@@ -753,8 +674,8 @@
         "dataset_size": 1183514,
         "k": 10
       },
-      "file" : "index/glove-100-inner/ggnn/kbuild96-segment64-refine2-k10",
-      "search_params" : [
+      "file": "glove-100-inner/ggnn/kbuild96-segment64-refine2-k10",
+      "search_params": [
         {"tau":0.001, "block_dim":64, "sorted_size":32},
         {"tau":0.005, "block_dim":64, "sorted_size":32},
         {"tau":0.01,  "block_dim":64, "sorted_size":32},
@@ -786,8 +707,7 @@
         {"tau":0.3,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
         {"tau":0.4,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
         {"tau":0.5,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}
-
-      ],
-      "search_result_file" : "result/glove-100-inner/ggnn/kbuild96-segment64-refine2-k10"
-    }]
+      ]
+    }
+  ]
 }
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/glove-50-angular.json b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-50-angular.json
new file mode 100644
index 0000000000..3e78c11814
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-50-angular.json
@@ -0,0 +1,1351 @@
+{
+  "dataset": {
+    "name": "glove-50-angular",
+    "base_file": "data/glove-50-angular/base.fbin",
+    "query_file": "data/glove-50-angular/query.fbin",
+    "distance": "euclidean"
+  },
+  "search_basic_param": {
+    "batch_size": 5000,
+    "k": 10,
+    "run_count": 3
+  },
+  "index": [
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file" : "index/glove-50-angular/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-50-angular/hnswlib/M12"
+    },
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file" : "index/glove-50-angular/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-50-angular/hnswlib/M16"
+    },
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file" : "index/glove-50-angular/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-50-angular/hnswlib/M24"
+    },
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file" : "index/glove-50-angular/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/glove-50-angular/hnswlib/M36"
+    },
+
+
+
+
+    {
+      "name": "raft_bfknn",
+      "algo": "raft_bfknn",
+      "build_param": {},
+      "file": "index/glove-50-angular/raft_bfknn/bfknn",
+      "search_params": [
+        {
+          "probe": 1
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_bfknn/bfknn"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 1024
+      },
+      "file": "index/glove-50-angular/faiss_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist1024"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 2048
+      },
+      "file": "index/glove-50-angular/faiss_ivf_flat/nlist2048",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist2048"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 4096
+      },
+      "file": "index/glove-50-angular/faiss_ivf_flat/nlist4096",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist4096"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 8192
+      },
+      "file": "index/glove-50-angular/faiss_ivf_flat/nlist8192",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist8192"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 16384
+      },
+      "file": "index/glove-50-angular/faiss_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist16384"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": true
+      },
+      "file": "index/glove-50-angular/faiss_ivf_pq/M64-nlist1024",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": false
+      },
+      "file": "index/glove-50-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/glove-50-angular/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist1024-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/glove-50-angular/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist2048-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/glove-50-angular/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist4096-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/glove-50-angular/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist8192-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/glove-50-angular/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist16384-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "int8"
+      },
+      "file": "index/glove-50-angular/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist1024-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "int8"
+      },
+      "file": "index/glove-50-angular/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist2048-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "int8"
+      },
+      "file": "index/glove-50-angular/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist4096-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "int8"
+      },
+      "file": "index/glove-50-angular/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist8192-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "int8"
+      },
+      "file": "index/glove-50-angular/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist16384-int8"
+    },
+    {
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
+      "build_param": {},
+      "file": "index/glove-50-angular/faiss_flat/flat",
+      "search_params": [
+        {}
+      ],
+      "search_result_file": "result/glove-50-angular/faiss_flat/flat"
+    },
+
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 5,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 32,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-50-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 16,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-50-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 512,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-50-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_flat.nlist1024",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 1024,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/glove-50-angular/raft_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_flat/nlist1024"
+    },
+    {
+      "name": "raft_ivf_flat.nlist16384",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 16384,
+        "ratio": 2,
+        "niter": 20
+      },
+      "file": "index/glove-50-angular/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/glove-50-angular/raft_ivf_flat/nlist16384"
+    },
+
+    {
+      "name" : "raft_cagra.dim32",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 32
+      },
+      "file" : "index/glove-50-angular/raft_cagra/dim32",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/glove-50-angular/raft_cagra/dim32"
+    },
+
+    {
+      "name" : "raft_cagra.dim64",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 64
+      },
+      "file" : "index/glove-50-angular/raft_cagra/dim64",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/glove-50-angular/raft_cagra/dim64"
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/lastfm-65-angular.json b/python/raft-ann-bench/raft-ann-bench/run/conf/lastfm-65-angular.json
new file mode 100644
index 0000000000..62f8878bd6
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/lastfm-65-angular.json
@@ -0,0 +1,1351 @@
+{
+  "dataset": {
+    "name": "lastfm-65-angular",
+    "base_file": "data/lastfm-65-angular/base.fbin",
+    "query_file": "data/lastfm-65-angular/query.fbin",
+    "distance": "euclidean"
+  },
+  "search_basic_param": {
+    "batch_size": 5000,
+    "k": 10,
+    "run_count": 3
+  },
+  "index": [
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file" : "index/lastfm-65-angular/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/lastfm-65-angular/hnswlib/M12"
+    },
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file" : "index/lastfm-65-angular/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/lastfm-65-angular/hnswlib/M16"
+    },
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file" : "index/lastfm-65-angular/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/lastfm-65-angular/hnswlib/M24"
+    },
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file" : "index/lastfm-65-angular/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/lastfm-65-angular/hnswlib/M36"
+    },
+
+
+
+
+    {
+      "name": "raft_bfknn",
+      "algo": "raft_bfknn",
+      "build_param": {},
+      "file": "index/lastfm-65-angular/raft_bfknn/bfknn",
+      "search_params": [
+        {
+          "probe": 1
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_bfknn/bfknn"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 1024
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist1024"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 2048
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist2048",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist2048"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 4096
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist4096",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist4096"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 8192
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist8192",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist8192"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 16384
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist16384"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": true
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": false
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist1024-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist2048-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist4096-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist8192-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist16384-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "int8"
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist1024-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "int8"
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist2048-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "int8"
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist4096-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "int8"
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist8192-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "int8"
+      },
+      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist16384-int8"
+    },
+    {
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
+      "build_param": {},
+      "file": "index/lastfm-65-angular/faiss_flat/flat",
+      "search_params": [
+        {}
+      ],
+      "search_result_file": "result/lastfm-65-angular/faiss_flat/flat"
+    },
+
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 5,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 32,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 16,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 512,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_flat.nlist1024",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 1024,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_flat/nlist1024"
+    },
+    {
+      "name": "raft_ivf_flat.nlist16384",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 16384,
+        "ratio": 2,
+        "niter": 20
+      },
+      "file": "index/lastfm-65-angular/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/lastfm-65-angular/raft_ivf_flat/nlist16384"
+    },
+
+    {
+      "name" : "raft_cagra.dim32",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 32
+      },
+      "file" : "index/lastfm-65-angular/raft_cagra/dim32",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/lastfm-65-angular/raft_cagra/dim32"
+    },
+
+    {
+      "name" : "raft_cagra.dim64",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 64
+      },
+      "file" : "index/lastfm-65-angular/raft_cagra/dim64",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/lastfm-65-angular/raft_cagra/dim64"
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/mnist-784-euclidean.json b/python/raft-ann-bench/raft-ann-bench/run/conf/mnist-784-euclidean.json
new file mode 100644
index 0000000000..30e39a841c
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/mnist-784-euclidean.json
@@ -0,0 +1,1351 @@
+{
+  "dataset": {
+    "name": "mnist-784-euclidean",
+    "base_file": "data/mnist-784-euclidean/base.fbin",
+    "query_file": "data/mnist-784-euclidean/query.fbin",
+    "distance": "euclidean"
+  },
+  "search_basic_param": {
+    "batch_size": 5000,
+    "k": 10,
+    "run_count": 3
+  },
+  "index": [
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file" : "index/mnist-784-euclidean/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M12"
+    },
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file" : "index/mnist-784-euclidean/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M16"
+    },
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file" : "index/mnist-784-euclidean/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M24"
+    },
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file" : "index/mnist-784-euclidean/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M36"
+    },
+
+
+
+
+    {
+      "name": "raft_bfknn",
+      "algo": "raft_bfknn",
+      "build_param": {},
+      "file": "index/mnist-784-euclidean/raft_bfknn/bfknn",
+      "search_params": [
+        {
+          "probe": 1
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_bfknn/bfknn"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 1024
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist1024"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 2048
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist2048",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist2048"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 4096
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist4096",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist4096"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 8192
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist8192",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist8192"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 16384
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist16384"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": true
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": false
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "int8"
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "int8"
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "int8"
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "int8"
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "int8"
+      },
+      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8"
+    },
+    {
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
+      "build_param": {},
+      "file": "index/mnist-784-euclidean/faiss_flat/flat",
+      "search_params": [
+        {}
+      ],
+      "search_result_file": "result/mnist-784-euclidean/faiss_flat/flat"
+    },
+
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 5,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 32,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 16,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 512,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_flat.nlist1024",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 1024,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_flat/nlist1024"
+    },
+    {
+      "name": "raft_ivf_flat.nlist16384",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 16384,
+        "ratio": 2,
+        "niter": 20
+      },
+      "file": "index/mnist-784-euclidean/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/mnist-784-euclidean/raft_ivf_flat/nlist16384"
+    },
+
+    {
+      "name" : "raft_cagra.dim32",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 32
+      },
+      "file" : "index/mnist-784-euclidean/raft_cagra/dim32",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/mnist-784-euclidean/raft_cagra/dim32"
+    },
+
+    {
+      "name" : "raft_cagra.dim64",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 64
+      },
+      "file" : "index/mnist-784-euclidean/raft_cagra/dim64",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/mnist-784-euclidean/raft_cagra/dim64"
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/nytimes-256-angular.json b/python/raft-ann-bench/raft-ann-bench/run/conf/nytimes-256-angular.json
new file mode 100644
index 0000000000..5d4e19d46b
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/nytimes-256-angular.json
@@ -0,0 +1,1351 @@
+{
+  "dataset": {
+    "name": "nytimes-256-angular",
+    "base_file": "data/nytimes-256-angular/base.fbin",
+    "query_file": "data/nytimes-256-angular/query.fbin",
+    "distance": "euclidean"
+  },
+  "search_basic_param": {
+    "batch_size": 5000,
+    "k": 10,
+    "run_count": 3
+  },
+  "index": [
+    {
+      "name" : "hnswlib.M12",
+      "algo" : "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file" : "index/nytimes-256-angular/hnswlib/M12",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/nytimes-256-angular/hnswlib/M12"
+    },
+    {
+      "name" : "hnswlib.M16",
+      "algo" : "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file" : "index/nytimes-256-angular/hnswlib/M16",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/nytimes-256-angular/hnswlib/M16"
+    },
+    {
+      "name" : "hnswlib.M24",
+      "algo" : "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file" : "index/nytimes-256-angular/hnswlib/M24",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/nytimes-256-angular/hnswlib/M24"
+    },
+    {
+      "name" : "hnswlib.M36",
+      "algo" : "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file" : "index/nytimes-256-angular/hnswlib/M36",
+      "search_params" : [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ],
+      "search_result_file" : "result/nytimes-256-angular/hnswlib/M36"
+    },
+
+
+
+
+    {
+      "name": "raft_bfknn",
+      "algo": "raft_bfknn",
+      "build_param": {},
+      "file": "index/nytimes-256-angular/raft_bfknn/bfknn",
+      "search_params": [
+        {
+          "probe": 1
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_bfknn/bfknn"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 1024
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist1024"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 2048
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist2048",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist2048"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 4096
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist4096",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist4096"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 8192
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist8192",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist8192"
+    },
+    {
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {
+        "nlist": 16384
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist16384"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": true
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": false
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "search_params": [
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist1024-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist2048-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist4096-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist8192-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "fp16"
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist16384-fp16"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 1024,
+        "quantizer_type": "int8"
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist1024-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 2048,
+        "quantizer_type": "int8"
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist2048-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 4096,
+        "quantizer_type": "int8"
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist4096-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 8192,
+        "quantizer_type": "int8"
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist8192-int8"
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {
+        "nlist": 16384,
+        "quantizer_type": "int8"
+      },
+      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist16384-int8"
+    },
+    {
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
+      "build_param": {},
+      "file": "index/nytimes-256-angular/faiss_flat/flat",
+      "search_params": [
+        {}
+      ],
+      "search_result_file": "result/nytimes-256-angular/faiss_flat/flat"
+    },
+
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 5,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "half"
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 32,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 16,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "fp8"
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "half",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 512,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
+      "search_params": [
+        {
+          "k": 10,
+          "numProbes": 10,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 50,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 100,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 200,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 500,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        },
+        {
+          "k": 10,
+          "numProbes": 1024,
+          "internalDistanceDtype": "float",
+          "smemLutDtype": "float"
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_flat.nlist1024",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 1024,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_flat/nlist1024",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_flat/nlist1024"
+    },
+    {
+      "name": "raft_ivf_flat.nlist16384",
+      "algo": "raft_ivf_flat",
+      "build_param": {
+        "nlist": 16384,
+        "ratio": 2,
+        "niter": 20
+      },
+      "file": "index/nytimes-256-angular/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {
+          "nprobe": 1
+        },
+        {
+          "nprobe": 5
+        },
+        {
+          "nprobe": 10
+        },
+        {
+          "nprobe": 50
+        },
+        {
+          "nprobe": 100
+        },
+        {
+          "nprobe": 200
+        },
+        {
+          "nprobe": 500
+        },
+        {
+          "nprobe": 1000
+        },
+        {
+          "nprobe": 2000
+        }
+      ],
+      "search_result_file": "result/nytimes-256-angular/raft_ivf_flat/nlist16384"
+    },
+
+    {
+      "name" : "raft_cagra.dim32",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 32
+      },
+      "file" : "index/nytimes-256-angular/raft_cagra/dim32",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/nytimes-256-angular/raft_cagra/dim32"
+    },
+
+    {
+      "name" : "raft_cagra.dim64",
+      "algo" : "raft_cagra",
+      "build_param": {
+        "index_dim" : 64
+      },
+      "file" : "index/nytimes-256-angular/raft_cagra/dim64",
+      "search_params" : [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ],
+      "search_result_file" : "result/nytimes-256-angular/raft_cagra/dim64"
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json b/python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json
index c4b8905b1d..116ea8d557 100644
--- a/python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json
@@ -1,22 +1,24 @@
 {
   "dataset": {
     "name": "sift-128-euclidean",
-    "base_file": "/home/cjnolet/workspace/ann_data/sift-128-euclidean/base.fbin",
-    "query_file": "/home/cjnolet/workspace/ann_data/sift-128-euclidean/query.fbin",
+    "base_file": "sift-128-euclidean/base.fbin",
+    "query_file": "sift-128-euclidean/query.fbin",
+    "groundtruth_neighbors_file": "sift-128-euclidean/groundtruth.neighbors.ibin",
     "distance": "euclidean"
   },
+
   "search_basic_param": {
     "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
+    "k": 10
   },
+
   "index": [
     {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
       "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M12",
-      "search_params" : [
+      "file": "sift-128-euclidean/hnswlib/M12",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -27,15 +29,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M12"
+      ]
     },
     {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
       "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M16",
-      "search_params" : [
+      "file": "sift-128-euclidean/hnswlib/M16",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -46,15 +47,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M16"
+      ]
     },
     {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
       "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M24",
-      "search_params" : [
+      "file": "sift-128-euclidean/hnswlib/M24",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -65,15 +65,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M24"
+      ]
     },
     {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
       "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M36",
-      "search_params" : [
+      "file": "sift-128-euclidean/hnswlib/M36",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -84,234 +83,109 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M36"
+      ]
     },
-
-
-
-
     {
       "name": "raft_bfknn",
       "algo": "raft_bfknn",
       "build_param": {},
-      "file": "index/sift-128-euclidean/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_bfknn/bfknn"
+      "file": "sift-128-euclidean/raft_bfknn/bfknn",
+      "search_params": [{"probe": 1}]
     },
     {
       "name": "faiss_ivf_flat.nlist1024",
       "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist1024",
+      "build_param": {"nlist": 1024},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist1024",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist1024"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_flat.nlist2048",
       "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist2048",
+      "build_param": {"nlist": 2048},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist2048",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist2048"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_flat.nlist4096",
       "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist4096",
+      "build_param": {"nlist": 4096},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist4096",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist4096"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_flat.nlist8192",
       "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist8192",
+      "build_param": {"nlist": 8192},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist8192",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist8192"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_flat.nlist16384",
       "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist16384",
+      "build_param": {"nlist": 16384},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist16384",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist16384"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
     },
     {
       "name": "faiss_ivf_pq.M64-nlist1024",
       "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024",
+      "build_param": {"nlist": 1024, "M": 64, "useFloat16": true, "usePrecomputed": true},
+      "file": "sift-128-euclidean/faiss_ivf_pq/M64-nlist1024",
       "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024"
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
@@ -322,1030 +196,303 @@
         "useFloat16": true,
         "usePrecomputed": false
       },
-      "file": "index/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "file": "sift-128-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
       "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024"
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist1024-fp16",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16",
+      "build_param": {"nlist": 1024, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist2048-fp16",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16",
+      "build_param": {"nlist": 2048, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist4096-fp16",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16",
+      "build_param": {"nlist": 4096, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist8192-fp16",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16",
+      "build_param": {"nlist": 8192, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist16384-fp16",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16",
+      "build_param": {"nlist": 16384, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist1024-int8",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist1024-int8",
+      "build_param": {"nlist": 1024, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist1024-int8",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist1024-int8"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist2048-int8",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist2048-int8",
+      "build_param": {"nlist": 2048,"quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist2048-int8",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist2048-int8"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist4096-int8",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist4096-int8",
+      "build_param": {"nlist": 4096, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist4096-int8",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist4096-int8"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist8192-int8",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist8192-int8",
+      "build_param": {"nlist": 8192, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist8192-int8",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist8192-int8"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "faiss_ivf_sq.nlist16384-int8",
       "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist16384-int8",
+      "build_param": {"nlist": 16384, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist16384-int8",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist16384-int8"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
     },
     {
       "name": "faiss_flat",
       "algo": "faiss_gpu_flat",
       "build_param": {},
-      "file": "index/sift-128-euclidean/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024"
+      "file": "sift-128-euclidean/faiss_flat/flat",
+      "search_params": [{}]
     },
     {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "name": "raft_ivf_pq.dimpq64-bitpq8-cluster1K",
       "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "build_param": {"niter": 25, "nlist": 1000, "pq_dim": 64, "pq_bits": 8, "ratio": 1},
+      "file": "sift-128-euclidean/raft_ivf_pq/dimpq64-bitpq8-cluster1K",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-bitpq6-cluster1K",
       "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "build_param": {"niter": 25, "nlist": 1000, "pq_dim": 128, "pq_bits": 6, "ratio": 1},
+      "file": "sift-128-euclidean/raft_ivf_pq/dimpq128-bitpq6-cluster1K",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
     },
     {
       "name": "raft_ivf_flat.nlist1024",
       "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_flat/nlist1024",
+      "build_param": {"nlist": 1024, "ratio": 1, "niter": 25},
+      "file": "sift-128-euclidean/raft_ivf_flat/nlist1024",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_flat/nlist1024"
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
     },
     {
       "name": "raft_ivf_flat.nlist16384",
       "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_flat/nlist16384",
+      "build_param": {"nlist": 16384, "ratio": 2, "niter": 20},
+      "file": "sift-128-euclidean/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
+    },
+    {
+      "name": "raft_cagra.dim32",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 32},
+      "file": "sift-128-euclidean/raft_cagra/dim32",
       "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/sift-128-euclidean/raft_cagra/dim32",
-      "search_params" : [
         {"itopk": 32},
         {"itopk": 64},
         {"itopk": 128}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/raft_cagra/dim32"
+      ]
     },
-
     {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/sift-128-euclidean/raft_cagra/dim64",
-      "search_params" : [
+      "name": "raft_cagra.dim64",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 64},
+      "file": "sift-128-euclidean/raft_cagra/dim64",
+      "search_params": [
         {"itopk": 32},
         {"itopk": 64},
         {"itopk": 128}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/raft_cagra/dim64"
+      ]
     }
   ]
 }

From 0eaa7e06c034ea84eec261cda92488ab88108215 Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Fri, 25 Aug 2023 08:41:28 +0200
Subject: [PATCH 15/70] Fix FAISS using a destroyed stream from previous
 benchmark case

---
 cpp/bench/ann/src/faiss/faiss_wrapper.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/cpp/bench/ann/src/faiss/faiss_wrapper.h b/cpp/bench/ann/src/faiss/faiss_wrapper.h
index 0dc06f99a5..7a3f91853f 100644
--- a/cpp/bench/ann/src/faiss/faiss_wrapper.h
+++ b/cpp/bench/ann/src/faiss/faiss_wrapper.h
@@ -84,6 +84,7 @@ class FaissGpu : public ANN<T> {
   };
 
   FaissGpu(Metric metric, int dim, int nlist);
+  virtual ~FaissGpu() noexcept { RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(sync_)); }
 
   void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) final;
 
@@ -114,11 +115,19 @@ class FaissGpu : public ANN<T> {
   template <typename GpuIndex, typename CpuIndex>
   void load_(const std::string& file);
 
+  void stream_wait(cudaStream_t stream) const
+  {
+    RAFT_CUDA_TRY(cudaEventRecord(sync_, faiss_default_stream_));
+    RAFT_CUDA_TRY(cudaStreamWaitEvent(stream, sync_));
+  }
+
   mutable faiss::gpu::StandardGpuResources gpu_resource_;
   std::unique_ptr<faiss::gpu::GpuIndex> index_;
   faiss::MetricType metric_type_;
   int nlist_;
   int device_;
+  cudaEvent_t sync_{nullptr};
+  cudaStream_t faiss_default_stream_{nullptr};
 };
 
 template <typename T>
@@ -127,6 +136,8 @@ FaissGpu<T>::FaissGpu(Metric metric, int dim, int nlist)
 {
   static_assert(std::is_same_v<T, float>, "faiss support only float type");
   RAFT_CUDA_TRY(cudaGetDevice(&device_));
+  RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming));
+  faiss_default_stream_ = gpu_resource_.getDefaultStream(device_);
 }
 
 template <typename T>
@@ -134,10 +145,10 @@ void FaissGpu<T>::build(const T* dataset, size_t nrow, cudaStream_t stream)
 {
   OmpSingleThreadScope omp_single_thread;
 
-  gpu_resource_.setDefaultStream(device_, stream);
   index_->train(nrow, dataset);  // faiss::gpu::GpuIndexFlat::train() will do nothing
   assert(index_->is_trained);
   index_->add(nrow, dataset);
+  stream_wait(stream);
 }
 
 template <typename T>
@@ -158,9 +169,9 @@ void FaissGpu<T>::search(const T* queries,
 {
   static_assert(sizeof(size_t) == sizeof(faiss::Index::idx_t),
                 "sizes of size_t and faiss::Index::idx_t are different");
-  gpu_resource_.setDefaultStream(device_, stream);
   index_->search(
     batch_size, queries, k, distances, reinterpret_cast<faiss::Index::idx_t*>(neighbors));
+  stream_wait(stream);
 }
 
 template <typename T>

From 4062d6feaa14b3504d3c9c3bec09c795e87573b9 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Fri, 25 Aug 2023 10:54:13 -0400
Subject: [PATCH 16/70] Fixing issue in conf file and stubbing out parameter
 tuning guide

---
 bench/ann/conf/deep-100M.json              |  1 +
 docs/source/ann_benchmarks_param_tuning.md | 32 ++++++++++++++++++----
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/bench/ann/conf/deep-100M.json b/bench/ann/conf/deep-100M.json
index 6591957961..1b82f424cc 100644
--- a/bench/ann/conf/deep-100M.json
+++ b/bench/ann/conf/deep-100M.json
@@ -240,6 +240,7 @@
         {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta"},
         {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta"}
       ]
+    },
     {
       "name": "raft_cagra.dim32.multi_kernel",
       "algo": "raft_cagra",
diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
index 1391f261cb..8efac4014d 100644
--- a/docs/source/ann_benchmarks_param_tuning.md
+++ b/docs/source/ann_benchmarks_param_tuning.md
@@ -3,11 +3,31 @@
 This guide outlines the various parameter settings that can be specified in [RAFT ANN Benchmark](raft_ann_benchmarks.md) json configuration files and explains the impact they have on corresponding algorithms to help inform their settings for benchmarking across desired levels of recall. 
 
 
-| Algorithm           | Parameter Options                            |
-|---------------------|----------------------------------------------|
-| `faiss_gpu_ivf_flat` | `{  }`                                       | `faiss_gpu_ivf_flat`, `faiss_gpu_ivf_pq` |
-| GGNN                | `ggnn`                                       |
-| HNSWlib             | `hnswlib`                                    |
-| RAFT                | `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq` |
+## RAFT Indexes
 
+### IVF-Flat
 
+| Parameter | Type           | Data Type             | Description                                                                                                                                                                       |
+|-----------|----------------|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `nlists`  | `build_param`  | Positive Integer `>0` | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
+| `nprobe`  | `search_params` | Positive Integer `>0` | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. |
+
+### IVF-PQ
+
+| Parameter | Type                                  | Data Type             | Description                                                                                                                                                                       |
+|-----------|---------------------------------------|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `nlists`  | `build_param`                         | Positive Integer `>0` | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
+| `nprobe`  | `search_params` | Positive Integer `>0` | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. |
+
+
+## FAISS Indexes
+
+### IVF-Flat
+
+### IVF-PQ
+
+
+## HNSWLib Index
+
+
+## GGNN Index
\ No newline at end of file

From 7141c219b60b599420489b4882cffb563a26c11a Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Fri, 25 Aug 2023 10:55:24 -0400
Subject: [PATCH 17/70] Adding CAGRA to tuning guide

---
 docs/source/ann_benchmarks_param_tuning.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
index 8efac4014d..80495b4ac1 100644
--- a/docs/source/ann_benchmarks_param_tuning.md
+++ b/docs/source/ann_benchmarks_param_tuning.md
@@ -20,6 +20,9 @@ This guide outlines the various parameter settings that can be specified in [RAF
 | `nprobe`  | `search_params` | Positive Integer `>0` | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. |
 
 
+### CAGRA
+
+
 ## FAISS Indexes
 
 ### IVF-Flat
@@ -27,6 +30,7 @@ This guide outlines the various parameter settings that can be specified in [RAF
 ### IVF-PQ
 
 
+
 ## HNSWLib Index
 
 

From 7c42a78cb7e57532e37d75fdfd7b6f18d4d7f1e8 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Fri, 25 Aug 2023 11:10:38 -0400
Subject: [PATCH 18/70] Adding ivf-flat description to tuning guide

---
 conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 1 +
 dependencies.yaml                                      | 1 +
 docs/source/ann_benchmarks_param_tuning.md             | 4 ++++
 docs/source/raft_ann_benchmarks.md                     | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
index d62404b16f..37a4042aac 100644
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -7,6 +7,7 @@ channels:
 - conda-forge
 - nvidia
 dependencies:
+- benchmark>=1.8.2
 - c-compiler
 - clang-tools=16.0.1
 - clang=16.0.1
diff --git a/dependencies.yaml b/dependencies.yaml
index cf8170b9a1..9a0807143c 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -168,6 +168,7 @@ dependencies:
           - glog>=0.6.0
           - h5py>=3.8.0
           - libfaiss>=1.7.1
+          - benchmark>=1.8.2
           - faiss-proc=*=cuda
           - matplotlib
           - pyyaml
diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
index 80495b4ac1..f5df2bf9ba 100644
--- a/docs/source/ann_benchmarks_param_tuning.md
+++ b/docs/source/ann_benchmarks_param_tuning.md
@@ -7,6 +7,10 @@ This guide outlines the various parameter settings that can be specified in [RAF
 
 ### IVF-Flat
 
+IVF-flat uses an inverted-file index, which partitions the vectors into a series of clusters, or lists, storing them in an interleaved format which is optimized for fast distance computation. The searching of an IVF-flat index reduces the total vectors in the index to those within some user-specified nearest clusters called probes.
+
+IVF-flat is a simple algorithm which won't save any space, but it provides competitive search times even at higher levels of recall.
+
 | Parameter | Type           | Data Type             | Description                                                                                                                                                                       |
 |-----------|----------------|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `nlists`  | `build_param`  | Positive Integer `>0` | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index f4af3c8787..e242f8b655 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -158,6 +158,8 @@ options:
   --algorithms ALGORITHMS
                         run only comma separated list of named algorithms (default: None)
   --indices INDICES     run only comma separated list of named indices. parameter `algorithms` is ignored (default: None)
+  -k, --count           number of nearest neighbors to return
+  --batch-size          number of query vectors to pass into search
   -f, --force           re-run algorithms even if their results already exist (default: False)
 ```
 `configuration` and `dataset` : `configuration` is a path to a configuration file for a given dataset.

From 92a37a8fd1ac7ba68291c142645f86ed9f49895c Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Fri, 25 Aug 2023 11:32:28 -0400
Subject: [PATCH 19/70] Updating ivf-flat and ivf-pq

---
 docs/source/ann_benchmarks_param_tuning.md | 56 +++++++++++++++++-----
 1 file changed, 43 insertions(+), 13 deletions(-)

diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
index f5df2bf9ba..c19f7faaca 100644
--- a/docs/source/ann_benchmarks_param_tuning.md
+++ b/docs/source/ann_benchmarks_param_tuning.md
@@ -5,7 +5,42 @@ This guide outlines the various parameter settings that can be specified in [RAF
 
 ## RAFT Indexes
 
-### IVF-Flat
+### `raft_ivf_flat`
+
+IVF-flat uses an inverted-file index, which partitions the vectors into a series of clusters, or lists, storing them in an interleaved format which is optimized for fast distance computation. The searching of an IVF-flat index reduces the total vectors in the index to those within some user-specified nearest clusters called probes.
+
+IVF-flat is a simple algorithm which won't save any space, but it provides competitive search times even at higher levels of recall.
+
+| Parameter | Type                                  | Data Type             | Description                                                                                                                                                                       |
+|-----------|---------------------------------------|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `nlists`  | `build_param`                         | Positive Integer `>0` | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
+| `ratio`   | `build_param`                         | Positive Float `>0`   | Fraction of the number of training points which should be used to train the clusters.                                                                                             |
+| `nprobe`  | `search_params` | Positive Integer `>0` | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
+
+
+### `raft_ivf_pq`
+
+IVF-pq is an inverted-file index, which partitions the vectors into a series of clusters, or lists, in a similar way to IVF-flat above. The difference is that IVF-PQ uses product quantization to also compress the vectors, giving the index a smaller memory footprint. Unfortunately, higher levels of compression can also shrink recall, which a refinement step can improve when the original vectors are still available.
+
+
+| Parameter               | Type           | Data Type                        | Description                                                                                                                                                                       |
+|-------------------------|----------------|----------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `nlists`                | `build_param`  | Positive Integer `>0`            | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
+| `niter`                 | `build_param`  | Positive Integer `>0`            | Number of k-means iterations to use when training the clusters.                                                                                                                   |
+| `pq_dim`                | `build_param`  | Positive Integer. Multiple of 8. | Dimensionality of the vector after product quantization. When 0, a heuristic is used to select this value. `pq_dim` * `pq_bits` must be a multiple of 8.                          |
+| `pq_bits`               | `build_param`  | Positive Integer. `4-8`          | Bit length of the vector element after quantization.                                                                                                                              |
+| `nprobe`                | `search_params` | Positive Integer `>0`            | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
+| `internalDistanceDtype` | `search_params` | [`float`, `half`]                | The precision to use for the distance computations. Lower precision can increase performance at the cost of accuracy.                                                             |
+| `smemLutDtype`          | `search_params` | [`float`, `half`, `fp8`]         | The precision to use for the lookup table in shared memory. Lower precision can increase performance at the cost of accuracy.                                                     |
+| `refine_ratio`          | `search_params` | Positive Number `>=0`             | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. .           |
+
+
+### `raft_cagra`
+
+
+## FAISS Indexes
+
+### `faiss_gpu_ivf_flat`
 
 IVF-flat uses an inverted-file index, which partitions the vectors into a series of clusters, or lists, storing them in an interleaved format which is optimized for fast distance computation. The searching of an IVF-flat index reduces the total vectors in the index to those within some user-specified nearest clusters called probes.
 
@@ -16,7 +51,9 @@ IVF-flat is a simple algorithm which won't save any space, but it provides compe
 | `nlists`  | `build_param`  | Positive Integer `>0` | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
 | `nprobe`  | `search_params` | Positive Integer `>0` | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. |
 
-### IVF-PQ
+### `faiss_gpu_ivf_pq`
+
+IVF-pq is an inverted-file index, which partitions the vectors into a series of clusters, or lists, in a similar way to IVF-flat above. The difference is that IVF-PQ uses product quantization to also compress the vectors, giving the index a smaller memory footprint. Unfortunately, higher levels of compression can also shrink recall, which a refinement step can improve when the original vectors are still available.
 
 | Parameter | Type                                  | Data Type             | Description                                                                                                                                                                       |
 |-----------|---------------------------------------|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -24,18 +61,11 @@ IVF-flat is a simple algorithm which won't save any space, but it provides compe
 | `nprobe`  | `search_params` | Positive Integer `>0` | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. |
 
 
-### CAGRA
-
-
-## FAISS Indexes
-
-### IVF-Flat
-
-### IVF-PQ
-
 
+## HNSW
 
-## HNSWLib Index
+### `hnswlib`
 
+## GGNN Index
 
-## GGNN Index
\ No newline at end of file
+### `ggnn`

From 39828401988b8178725dbd06b8f6404b6debc7c8 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Fri, 25 Aug 2023 12:02:12 -0400
Subject: [PATCH 20/70] Adding tuning guide tables for ivf-flat and ivf-pq for
 faiss and raft

---
 docs/source/ann_benchmarks_param_tuning.md | 52 ++++++++++++----------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
index c19f7faaca..d5a4f78db0 100644
--- a/docs/source/ann_benchmarks_param_tuning.md
+++ b/docs/source/ann_benchmarks_param_tuning.md
@@ -11,28 +11,28 @@ IVF-flat uses an inverted-file index, which partitions the vectors into a series
 
 IVF-flat is a simple algorithm which won't save any space, but it provides competitive search times even at higher levels of recall.
 
-| Parameter | Type                                  | Data Type             | Description                                                                                                                                                                       |
-|-----------|---------------------------------------|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlists`  | `build_param`                         | Positive Integer `>0` | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`   | `build_param`                         | Positive Float `>0`   | Fraction of the number of training points which should be used to train the clusters.                                                                                             |
-| `nprobe`  | `search_params` | Positive Integer `>0` | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
+| Parameter | Type             | Required | Data Type           | Default | Description                                                                                                                                                                       |
+|-----------|------------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `nlists`  | `build_param`    | Y        | Positive Integer >0 |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
+| `niter`   | `build_param`    | N        | Positive Integer >0 | 20      | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
+| `ratio`   | `build_param`     | Y        | Positive Float >0   | 0.5     | Fraction of the number of training points which should be used to train the clusters.                                                                                             |
+| `nprobe`  | `search_params` | Y        |  Positive Integer >0 |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
 
 
 ### `raft_ivf_pq`
 
 IVF-pq is an inverted-file index, which partitions the vectors into a series of clusters, or lists, in a similar way to IVF-flat above. The difference is that IVF-PQ uses product quantization to also compress the vectors, giving the index a smaller memory footprint. Unfortunately, higher levels of compression can also shrink recall, which a refinement step can improve when the original vectors are still available.
 
-
-| Parameter               | Type           | Data Type                        | Description                                                                                                                                                                       |
-|-------------------------|----------------|----------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlists`                | `build_param`  | Positive Integer `>0`            | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `niter`                 | `build_param`  | Positive Integer `>0`            | Number of k-means iterations to use when training the clusters.                                                                                                                   |
-| `pq_dim`                | `build_param`  | Positive Integer. Multiple of 8. | Dimensionality of the vector after product quantization. When 0, a heuristic is used to select this value. `pq_dim` * `pq_bits` must be a multiple of 8.                          |
-| `pq_bits`               | `build_param`  | Positive Integer. `4-8`          | Bit length of the vector element after quantization.                                                                                                                              |
-| `nprobe`                | `search_params` | Positive Integer `>0`            | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
-| `internalDistanceDtype` | `search_params` | [`float`, `half`]                | The precision to use for the distance computations. Lower precision can increase performance at the cost of accuracy.                                                             |
-| `smemLutDtype`          | `search_params` | [`float`, `half`, `fp8`]         | The precision to use for the lookup table in shared memory. Lower precision can increase performance at the cost of accuracy.                                                     |
-| `refine_ratio`          | `search_params` | Positive Number `>=0`             | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. .           |
+| Parameter               | Type           | Required | Data Type                    | Default | Description                                                                                                                                                                     |
+|-------------------------|----------------|---|------------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `nlists`                | `build_param`  | Y | Positive Integer >0          |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
+| `niter`                 | `build_param`  | N | Positive Integer >0          | 20      | Number of k-means iterations to use when training the clusters.                                                                                                                 |
+| `pq_dim`                | `build_param`  | N | Positive Integer. Multiple of 8. | 0       | Dimensionality of the vector after product quantization. When 0, a heuristic is used to select this value. `pq_dim` * `pq_bits` must be a multiple of 8.                        |
+| `pq_bits`               | `build_param`  | N | Positive Integer. [4-8]      | 8       | Bit length of the vector element after quantization.                                                                                                                            |
+| `nprobe`                | `search_params` | Y | Positive Integer >0          |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                     |
+| `internalDistanceDtype` | `search_params` | N | [`float`, `half`]            | `half`  | The precision to use for the distance computations. Lower precision can increase performance at the cost of accuracy.                                                           |
+| `smemLutDtype`          | `search_params` | N | [`float`, `half`, `fp8`]     | `half`  | The precision to use for the lookup table in shared memory. Lower precision can increase performance at the cost of accuracy.                                                   |
+| `refine_ratio`          | `search_params` | N| Positive Number >=0          | 0       | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors.           |
 
 
 ### `raft_cagra`
@@ -46,19 +46,23 @@ IVF-flat uses an inverted-file index, which partitions the vectors into a series
 
 IVF-flat is a simple algorithm which won't save any space, but it provides competitive search times even at higher levels of recall.
 
-| Parameter | Type           | Data Type             | Description                                                                                                                                                                       |
-|-----------|----------------|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlists`  | `build_param`  | Positive Integer `>0` | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `nprobe`  | `search_params` | Positive Integer `>0` | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. |
+| Parameter | Type           | Required | Data Type           | Default | Description                                                                                                                                                                       |
+|-----------|----------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `nlists`  | `build_param`  | Y        | Positive Integer >0 | | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
+| `nprobe`  | `search_params` | Y        | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
 
 ### `faiss_gpu_ivf_pq`
 
 IVF-pq is an inverted-file index, which partitions the vectors into a series of clusters, or lists, in a similar way to IVF-flat above. The difference is that IVF-PQ uses product quantization to also compress the vectors, giving the index a smaller memory footprint. Unfortunately, higher levels of compression can also shrink recall, which a refinement step can improve when the original vectors are still available.
 
-| Parameter | Type                                  | Data Type             | Description                                                                                                                                                                       |
-|-----------|---------------------------------------|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlists`  | `build_param`                         | Positive Integer `>0` | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `nprobe`  | `search_params` | Positive Integer `>0` | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. |
+| Parameter               | Type           | Required | Data Type                        | Default | Description                                                                                                                                                                       |
+|-------------------------|----------------|----------|----------------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `nlists`                | `build_param`  | Y        | Positive Integer >0              |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
+| `M`                     | `build_param`  | Y        | Positive Integer Power of 2 [8-64] |         | Number of chunks or subquantizers for each vector.                                                                                                                                |
+| `usePrecomputed`                | `build_param`  | N        | Boolean. Default=`false`         | `false` | Use pre-computed lookup tables to speed up search at the cost of increased memory usage.                                                                                          |
+| `useFloat16`               | `build_param`  | N        | Boolean. Default=`false`         | `false`  | Use half-precision floats for clustering step.                                                                                                                                    |
+| `nprobe`                | `search_params` | Y        | Positive Integer >0              |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
+
 
 
 

From d2bfc11c63b3e12c0e6881b60ee04feb3e757a38 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Fri, 25 Aug 2023 12:04:13 -0400
Subject: [PATCH 21/70] Reatio is not required

---
 docs/source/ann_benchmarks_param_tuning.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
index d5a4f78db0..f5801322d3 100644
--- a/docs/source/ann_benchmarks_param_tuning.md
+++ b/docs/source/ann_benchmarks_param_tuning.md
@@ -15,7 +15,7 @@ IVF-flat is a simple algorithm which won't save any space, but it provides compe
 |-----------|------------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | `nlists`  | `build_param`    | Y        | Positive Integer >0 |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
 | `niter`   | `build_param`    | N        | Positive Integer >0 | 20      | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `ratio`   | `build_param`     | Y        | Positive Float >0   | 0.5     | Fraction of the number of training points which should be used to train the clusters.                                                                                             |
+| `ratio`   | `build_param`     | N        | Positive Float >0   | 0.5     | Fraction of the number of training points which should be used to train the clusters.                                                                                             |
 | `nprobe`  | `search_params` | Y        |  Positive Integer >0 |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
 
 

From 31594e780e43932a156f1aa6f1e676e1c424dc67 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 25 Aug 2023 12:28:33 -0500
Subject: [PATCH 22/70] FIX changes that got lost during rebasing

---
 bench/ann/algos.yaml                          |   30 -
 bench/ann/conf/bigann-100M.json               |  196 ---
 bench/ann/conf/deep-100M.json                 |  286 ----
 bench/ann/conf/deep-1B.json                   |   34 -
 bench/ann/conf/deep-image-96-angular.json     | 1366 -----------------
 .../ann/conf/fashion-mnist-784-euclidean.json | 1366 -----------------
 bench/ann/conf/gist-960-euclidean.json        | 1351 ----------------
 bench/ann/conf/glove-100-angular.json         | 1351 ----------------
 bench/ann/conf/glove-100-inner.json           |  713 ---------
 bench/ann/conf/glove-50-angular.json          | 1351 ----------------
 bench/ann/conf/lastfm-65-angular.json         | 1351 ----------------
 bench/ann/conf/mnist-784-euclidean.json       | 1351 ----------------
 bench/ann/conf/nytimes-256-angular.json       | 1351 ----------------
 bench/ann/conf/sift-128-euclidean.json        |  498 ------
 bench/ann/data_export.py                      |   80 -
 bench/ann/get_dataset.py                      |   93 --
 bench/ann/plot.py                             |  240 ---
 bench/ann/run.py                              |  234 ---
 bench/ann/split_groundtruth.py                |   47 -
 .../raft-ann-bench/data_export/__main__.py    |    7 +-
 .../raft-ann-bench/get_dataset/__main__.py    |   12 +-
 .../raft-ann-bench/run/__main__.py            |   18 +-
 .../split_groundtruth/__main__.py             |    5 +-
 23 files changed, 27 insertions(+), 13304 deletions(-)
 delete mode 100644 bench/ann/algos.yaml
 delete mode 100644 bench/ann/conf/bigann-100M.json
 delete mode 100644 bench/ann/conf/deep-100M.json
 delete mode 100644 bench/ann/conf/deep-1B.json
 delete mode 100644 bench/ann/conf/deep-image-96-angular.json
 delete mode 100644 bench/ann/conf/fashion-mnist-784-euclidean.json
 delete mode 100644 bench/ann/conf/gist-960-euclidean.json
 delete mode 100644 bench/ann/conf/glove-100-angular.json
 delete mode 100644 bench/ann/conf/glove-100-inner.json
 delete mode 100644 bench/ann/conf/glove-50-angular.json
 delete mode 100644 bench/ann/conf/lastfm-65-angular.json
 delete mode 100644 bench/ann/conf/mnist-784-euclidean.json
 delete mode 100644 bench/ann/conf/nytimes-256-angular.json
 delete mode 100644 bench/ann/conf/sift-128-euclidean.json
 delete mode 100644 bench/ann/data_export.py
 delete mode 100644 bench/ann/get_dataset.py
 delete mode 100644 bench/ann/plot.py
 delete mode 100644 bench/ann/run.py
 delete mode 100644 bench/ann/split_groundtruth.py

diff --git a/bench/ann/algos.yaml b/bench/ann/algos.yaml
deleted file mode 100644
index 5f554fc46b..0000000000
--- a/bench/ann/algos.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-faiss_gpu_ivf_flat:
-  executable: FAISS_IVF_FLAT_ANN_BENCH
-  disabled: false
-faiss_gpu_flat:
-  executable: FAISS_IVF_FLAT_ANN_BENCH
-  disabled: false
-faiss_gpu_ivf_pq:
-  executable: FAISS_IVF_PQ_ANN_BENCH
-  disabled: false
-faiss_gpu_ivf_sq:
-  executable: FAISS_IVF_PQ_ANN_BENCH
-  disabled: false
-faiss_gpu_bfknn:
-  executable: FAISS_BFKNN_ANN_BENCH
-  disabled: false
-raft_ivf_flat:
-  executable: RAFT_IVF_FLAT_ANN_BENCH
-  disabled: false
-raft_ivf_pq:
-  executable: RAFT_IVF_PQ_ANN_BENCH
-  disabled: false
-raft_cagra:
-  executable: RAFT_CAGRA_ANN_BENCH
-  disabled: false
-ggnn:
-  executable: GGNN_ANN_BENCH
-  disabled: false
-hnswlib:
-  executable: HNSWLIB_ANN_BENCH
-  disabled: false
\ No newline at end of file
diff --git a/bench/ann/conf/bigann-100M.json b/bench/ann/conf/bigann-100M.json
deleted file mode 100644
index c691c68299..0000000000
--- a/bench/ann/conf/bigann-100M.json
+++ /dev/null
@@ -1,196 +0,0 @@
-{
-  "dataset": {
-    "name": "bigann-100M",
-    "base_file": "bigann-1B/base.1B.u8bin",
-    "subset_size": 100000000,
-    "query_file": "bigann-1B/query.public.10K.u8bin",
-    "groundtruth_neighbors_file": "bigann-100M/groundtruth.neighbors.ibin",
-    "distance": "euclidean"
-  },
-
-  "search_basic_param": {
-    "batch_size": 10000,
-    "k": 10
-  },
-
-  "index": [
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster5K",
-      "algo": "raft_ivf_pq",
-      "build_param": {"niter": 25, "nlist": 5000, "pq_dim": 64, "ratio": 10},
-      "file": "bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
-      "dataset_memtype": "host",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster10K",
-      "algo": "raft_ivf_pq",
-      "build_param": {"niter": 25, "nlist": 10000, "pq_dim": 64, "ratio": 10},
-      "file": "bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
-      ]
-    },
-    {
-      "name": "hnswlib.M12",
-      "algo": "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file": "bigann-100M/hnswlib/M12",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M16",
-      "algo": "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file": "bigann-100M/hnswlib/M16",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M24",
-      "algo": "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file": "bigann-100M/hnswlib/M24",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M36",
-      "algo": "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file": "bigann-100M/hnswlib/M36",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "raft_ivf_flat.nlist100K",
-      "algo": "raft_ivf_flat",
-      "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
-      "dataset_memtype":"host",
-      "file": "bigann-100M/raft_ivf_flat/nlist100K",
-      "search_params": [
-        {"max_batch":10000, "max_k":10, "nprobe":20},
-        {"max_batch":10000, "max_k":10, "nprobe":30},
-        {"max_batch":10000, "max_k":10, "nprobe":40},
-        {"max_batch":10000, "max_k":10, "nprobe":50},
-        {"max_batch":10000, "max_k":10, "nprobe":100},
-        {"max_batch":10000, "max_k":10, "nprobe":200},
-        {"max_batch":10000, "max_k":10, "nprobe":500},
-        {"max_batch":10000, "max_k":10, "nprobe":1000}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim32",
-      "algo": "raft_cagra",
-      "dataset_memtype": "host",
-      "build_param": {"index_dim": 32},
-      "file": "bigann-100M/raft_cagra/dim32",
-      "search_params": [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim64",
-      "algo": "raft_cagra",
-      "dataset_memtype":"host",
-      "build_param": {"index_dim": 64},
-      "file": "bigann-100M/raft_cagra/dim64",
-      "search_params": [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ]
-    }
-  ]
-}
diff --git a/bench/ann/conf/deep-100M.json b/bench/ann/conf/deep-100M.json
deleted file mode 100644
index 6591957961..0000000000
--- a/bench/ann/conf/deep-100M.json
+++ /dev/null
@@ -1,286 +0,0 @@
-{
-  "dataset": {
-    "name": "deep-100M",
-    "base_file": "data/deep-1B/base.1B.fbin",
-    "subset_size": 100000000,
-    "query_file": "data/deep-1B/query.public.10K.fbin",
-    "groundtruth_neighbors_file": "deep-100M/groundtruth.neighbors.ibin",
-    "distance": "euclidean"
-  },
-
-  "search_basic_param": {
-    "batch_size": 10000,
-    "k": 10
-  },
-
-  "index": [
-    {
-      "name": "hnswlib.M12",
-      "algo": "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file": "deep-100M/hnswlib/M12",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M16",
-      "algo": "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file": "deep-100M/hnswlib/M16",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M24",
-      "algo": "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file": "deep-100M/hnswlib/M24",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M36",
-      "algo": "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file": "deep-100M/hnswlib/M36",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist50K",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":50000},
-      "file": "deep-100M/faiss_ivf_flat/nlist50K",
-      "search_params": [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist100K",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":100000},
-      "file": "deep-100M/faiss_ivf_flat/nlist100K",
-      "search_params": [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist200K",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":200000},
-      "file": "deep-100M/faiss_ivf_flat/nlist200K",
-      "search_params": [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M48-nlist16K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":16384, "M":48},
-      "file": "deep-100M/faiss_ivf_pq/M48-nlist16K",
-      "search_params": [
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M48-nlist50K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":50000, "M":48},
-      "file": "deep-100M/faiss_ivf_pq/M48-nlist50K",
-      "search_params": [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M48-nlist100K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":100000, "M":48},
-      "file": "deep-100M/faiss_ivf_pq/M48-nlist100K",
-      "search_params": [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "raft_ivf_flat.nlist100K",
-      "algo": "raft_ivf_flat",
-      "dataset_memtype":"host",
-      "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
-      "file": "deep-100M/raft_ivf_flat/nlist100K",
-      "search_params": [
-        {"max_batch":10000, "max_k":10, "nprobe":20},
-        {"max_batch":10000, "max_k":10, "nprobe":30},
-        {"max_batch":10000, "max_k":10, "nprobe":40},
-        {"max_batch":10000, "max_k":10, "nprobe":50},
-        {"max_batch":10000, "max_k":10, "nprobe":100},
-        {"max_batch":10000, "max_k":10, "nprobe":200},
-        {"max_batch":10000, "max_k":10, "nprobe":500},
-        {"max_batch":10000, "max_k":10, "nprobe":1000}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim32",
-      "algo": "raft_cagra",
-      "dataset_memtype":"host",
-      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
-      "file": "deep-100M/raft_cagra/dim32",
-      "search_params": [
-        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "single_cta"},
-        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "single_cta"},
-        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "single_cta"},
-        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "single_cta"},
-        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "single_cta"},
-        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "single_cta"},
-        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "single_cta"},
-        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "single_cta"},
-        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "single_cta"},
-        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "single_cta"},
-        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "single_cta"},
-        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "single_cta"}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim32.multi_cta",
-      "algo": "raft_cagra",
-      "dataset_memtype":"host",
-      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
-      "file": "deep-100M/raft_cagra/dim32",
-      "search_params": [
-        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta"},
-        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta"},
-        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta"},
-        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta"},
-        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta"},
-        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta"},
-        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta"},
-        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta"},
-        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta"},
-        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta"},
-        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta"},
-        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta"}
-      ]
-    {
-      "name": "raft_cagra.dim32.multi_kernel",
-      "algo": "raft_cagra",
-      "dataset_memtype":"host",
-      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
-      "file": "deep-100M/raft_cagra/dim32",
-      "search_params": [
-        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_kernel"},
-        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_kernel"},
-        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_kernel"},
-        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_kernel"},
-        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_kernel"},
-        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_kernel"},
-        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_kernel"},
-        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_kernel"},
-        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_kernel"},
-        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_kernel"},
-        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_kernel"},
-        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_kernel"}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim64",
-      "algo": "raft_cagra",
-      "dataset_memtype":"host",
-      "build_param": {"index_dim": 64},
-      "file": "deep-100M/raft_cagra/dim64",
-      "search_params": [
-        {"itopk": 32, "search_width": 1, "max_iterations": 0},
-        {"itopk": 32, "search_width": 1, "max_iterations": 32},
-        {"itopk": 64, "search_width": 4, "max_iterations": 16},
-        {"itopk": 64, "search_width": 1, "max_iterations": 64},
-        {"itopk": 96, "search_width": 2, "max_iterations": 48},
-        {"itopk": 128, "search_width": 8, "max_iterations": 16},
-        {"itopk": 128, "search_width": 2, "max_iterations": 64},
-        {"itopk": 192, "search_width": 8, "max_iterations": 24},
-        {"itopk": 192, "search_width": 2, "max_iterations": 96},
-        {"itopk": 256, "search_width": 8, "max_iterations": 32},
-        {"itopk": 384, "search_width": 8, "max_iterations": 48},
-        {"itopk": 512, "search_width": 8, "max_iterations": 64}
-      ]
-    }
-  ]
-}
diff --git a/bench/ann/conf/deep-1B.json b/bench/ann/conf/deep-1B.json
deleted file mode 100644
index 632d2f7308..0000000000
--- a/bench/ann/conf/deep-1B.json
+++ /dev/null
@@ -1,34 +0,0 @@
-{
-  "dataset": {
-    "name": "deep-1B",
-    "base_file": "deep-1B/base.1B.fbin",
-    "query_file": "deep-1B/query.public.10K.fbin",
-    "groundtruth_neighbors_file": "deep-1B/groundtruth.neighbors.ibin",
-    "distance": "inner_product"
-  },
-
-  "search_basic_param": {
-    "batch_size": 10000,
-    "k": 10
-  },
-
-  "index": [
-    {
-      "name": "faiss_ivf_pq.M48-nlist50K",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":50000, "M":48},
-      "file": "deep-1B/faiss_ivf_pq/M48-nlist50K",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000},
-        {"nprobe":2000}
-      ]
-    }
-  ]
-}
diff --git a/bench/ann/conf/deep-image-96-angular.json b/bench/ann/conf/deep-image-96-angular.json
deleted file mode 100644
index 4467e09dab..0000000000
--- a/bench/ann/conf/deep-image-96-angular.json
+++ /dev/null
@@ -1,1366 +0,0 @@
-{
-  "dataset": {
-    "name": "deep-image-96-angular",
-    "base_file": "data/deep-image-96-angular/base.fbin",
-    "query_file": "data/deep-image-96-angular/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-angular/hnswlib/M12",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-image-96-angular/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-angular/hnswlib/M16",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-image-96-angular/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-angular/hnswlib/M24",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-image-96-angular/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-angular/hnswlib/M36",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-image-96-angular/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "dataset_memtype": "device",
-      "build_param": {},
-      "file": "index/deep-image-96-angular/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/deep-image-96-angular/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/deep-image-96-angular/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/deep-image-96-angular/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "dataset_memtype": "device",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/deep-image-96-angular/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/deep-image-96-angular/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "dataset_memtype": "device",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/deep-image-96-angular/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/deep-image-96-angular/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/bench/ann/conf/fashion-mnist-784-euclidean.json b/bench/ann/conf/fashion-mnist-784-euclidean.json
deleted file mode 100644
index 5a0713ca0b..0000000000
--- a/bench/ann/conf/fashion-mnist-784-euclidean.json
+++ /dev/null
@@ -1,1366 +0,0 @@
-{
-  "dataset": {
-    "name": "fashion-mnist-784-euclidean",
-    "base_file": "data/fashion-mnist-784-euclidean/base.fbin",
-    "query_file": "data/fashion-mnist-784-euclidean/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M12",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M16",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M24",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/fashion-mnist-784-euclidean/hnswlib/M36",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "dataset_memtype": "device",
-      "build_param": {},
-      "file": "index/fashion-mnist-784-euclidean/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/fashion-mnist-784-euclidean/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/fashion-mnist-784-euclidean/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/fashion-mnist-784-euclidean/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "dataset_memtype": "device",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/fashion-mnist-784-euclidean/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "dataset_memtype": "device",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/fashion-mnist-784-euclidean/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/fashion-mnist-784-euclidean/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/bench/ann/conf/gist-960-euclidean.json b/bench/ann/conf/gist-960-euclidean.json
deleted file mode 100644
index d03df0f486..0000000000
--- a/bench/ann/conf/gist-960-euclidean.json
+++ /dev/null
@@ -1,1351 +0,0 @@
-{
-  "dataset": {
-    "name": "gist-960-euclidean",
-    "base_file": "data/gist-960-euclidean/base.fbin",
-    "query_file": "data/gist-960-euclidean/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/gist-960-euclidean/hnswlib/M12",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/gist-960-euclidean/hnswlib/M16",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/gist-960-euclidean/hnswlib/M24",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/gist-960-euclidean/hnswlib/M36",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/gist-960-euclidean/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/gist-960-euclidean/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/gist-960-euclidean/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/gist-960-euclidean/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/gist-960-euclidean/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/gist-960-euclidean/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/gist-960-euclidean/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/gist-960-euclidean/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/bench/ann/conf/glove-100-angular.json b/bench/ann/conf/glove-100-angular.json
deleted file mode 100644
index 1d3dc09988..0000000000
--- a/bench/ann/conf/glove-100-angular.json
+++ /dev/null
@@ -1,1351 +0,0 @@
-{
-  "dataset": {
-    "name": "glove-100-angular",
-    "base_file": "data/glove-100-angular/base.fbin",
-    "query_file": "data/glove-100-angular/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-angular/hnswlib/M12",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-angular/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-angular/hnswlib/M16",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-angular/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-angular/hnswlib/M24",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-angular/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-100-angular/hnswlib/M36",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-angular/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/glove-100-angular/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/glove-100-angular/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/glove-100-angular/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/glove-100-angular/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/glove-100-angular/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/glove-100-angular/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/glove-100-angular/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/glove-100-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-100-angular/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-100-angular/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-100-angular/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-100-angular/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-100-angular/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-100-angular/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-100-angular/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-100-angular/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-100-angular/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-100-angular/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/glove-100-angular/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/glove-100-angular/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-100-angular/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/glove-100-angular/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-100-angular/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/glove-100-angular/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-100-angular/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/glove-100-angular/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-100-angular/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/bench/ann/conf/glove-100-inner.json b/bench/ann/conf/glove-100-inner.json
deleted file mode 100644
index 7c95ceb439..0000000000
--- a/bench/ann/conf/glove-100-inner.json
+++ /dev/null
@@ -1,713 +0,0 @@
-{
-  "dataset": {
-    "name": "glove-100-inner",
-    "base_file": "glove-100-inner/base.fbin",
-    "query_file": "glove-100-inner/query.fbin",
-    "groundtruth_neighbors_file": "glove-100-inner/groundtruth.neighbors.ibin",
-    "distance": "inner_product"
-  },
-
-  "search_basic_param": {
-    "batch_size": 1,
-    "k": 10
-  },
-
-  "index": [
-    {
-      "name": "hnswlib.M4",
-      "algo": "hnswlib",
-      "build_param": {"M":4, "efConstruction":500, "numThreads":4},
-      "file": "glove-100-inner/hnswlib/M4",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M8",
-      "algo": "hnswlib",
-      "build_param": {"M":8, "efConstruction":500, "numThreads":4},
-      "file": "glove-100-inner/hnswlib/M8",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M12",
-      "algo": "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":4},
-      "file": "glove-100-inner/hnswlib/M12",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M16",
-      "algo": "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":4},
-      "file": "glove-100-inner/hnswlib/M16",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M24",
-      "algo": "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":4},
-      "file": "glove-100-inner/hnswlib/M24",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M36",
-      "algo": "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":4},
-      "file": "glove-100-inner/hnswlib/M36",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M48",
-      "algo": "hnswlib",
-      "build_param": {"M":48, "efConstruction":500, "numThreads":4},
-      "file": "glove-100-inner/hnswlib/M48",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M64",
-      "algo": "hnswlib",
-      "build_param": {"M":64, "efConstruction":500, "numThreads":4},
-      "file": "glove-100-inner/hnswlib/M64",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M96",
-      "algo": "hnswlib",
-      "build_param": {"M":96, "efConstruction":500, "numThreads":4},
-      "file": "glove-100-inner/hnswlib/M96",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":1024},
-      "file": "glove-100-inner/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":2048},
-      "file": "glove-100-inner/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":4096},
-      "file": "glove-100-inner/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":8192},
-      "file": "glove-100-inner/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":16384},
-      "file": "glove-100-inner/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000},
-        {"nprobe":2000}
-      ]
-    },
-
-
-
-    {
-      "name": "faiss_ivf_pq.M2-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":1024, "M":2},
-      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist1024",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M2-nlist2048",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":2048, "M":2},
-      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist2048",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M2-nlist4096",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":4096, "M":2},
-      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist4096",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M2-nlist8192",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":8192, "M":2},
-      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist8192",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M2-nlist16384",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":16384, "M":2},
-      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist16384",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000},
-        {"nprobe":2000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M4-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":1024, "M":4},
-      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist1024",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M4-nlist2048",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":2048, "M":4},
-      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist2048",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M4-nlist4096",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":4096, "M":4},
-      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist4096",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M4-nlist8192",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":8192, "M":4},
-      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist8192",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M4-nlist16384",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":16384, "M":4},
-      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist16384",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000},
-        {"nprobe":2000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M20-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":1024, "M":20},
-      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist1024",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M20-nlist2048",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":2048, "M":20},
-      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist2048",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M20-nlist4096",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":4096, "M":20},
-      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist4096",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M20-nlist8192",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":8192, "M":20},
-      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist8192",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M20-nlist16384",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":16384, "M":20},
-      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist16384",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000},
-        {"nprobe":2000}
-      ]
-    },
-
-
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":1024, "quantizer_type":"fp16"},
-      "file": "glove-100-inner/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":2048, "quantizer_type":"fp16"},
-      "file": "glove-100-inner/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":4096, "quantizer_type":"fp16"},
-      "file": "glove-100-inner/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":8192, "quantizer_type":"fp16"},
-      "file": "glove-100-inner/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":16384, "quantizer_type":"fp16"},
-      "file": "glove-100-inner/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000},
-        {"nprobe":2000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":1024, "quantizer_type":"int8"},
-      "file": "glove-100-inner/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":2048, "quantizer_type":"int8"},
-      "file": "glove-100-inner/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":4096, "quantizer_type":"int8"},
-      "file": "glove-100-inner/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":8192, "quantizer_type":"int8"},
-      "file": "glove-100-inner/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist":16384, "quantizer_type":"int8"},
-      "file": "glove-100-inner/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000},
-        {"nprobe":2000}
-      ]
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "glove-100-inner/faiss_flat/flat",
-      "search_params": [{}]
-    },
-    {
-      "name": "ggnn.kbuild96-segment64-refine2-k10",
-      "algo": "ggnn",
-      "build_param": {
-        "k_build": 96,
-        "segment_size": 64,
-        "refine_iterations": 2,
-        "dataset_size": 1183514,
-        "k": 10
-      },
-      "file": "glove-100-inner/ggnn/kbuild96-segment64-refine2-k10",
-      "search_params": [
-        {"tau":0.001, "block_dim":64, "sorted_size":32},
-        {"tau":0.005, "block_dim":64, "sorted_size":32},
-        {"tau":0.01,  "block_dim":64, "sorted_size":32},
-        {"tau":0.02,  "block_dim":64, "sorted_size":32},
-        {"tau":0.03,  "block_dim":64, "sorted_size":32},
-        {"tau":0.04,  "block_dim":64, "sorted_size":32},
-        {"tau":0.05,  "block_dim":64, "sorted_size":32},
-        {"tau":0.06,  "block_dim":64, "sorted_size":32},
-        {"tau":0.09,  "block_dim":64, "sorted_size":32},
-        {"tau":0.12,  "block_dim":64, "sorted_size":32},
-        {"tau":0.18,  "block_dim":64, "sorted_size":32},
-        {"tau":0.21,  "block_dim":64, "sorted_size":32},
-        {"tau":0.24,  "block_dim":64, "sorted_size":32},
-        {"tau":0.27,  "block_dim":64, "sorted_size":32},
-        {"tau":0.3,   "block_dim":64, "sorted_size":32},
-        {"tau":0.4,   "block_dim":64, "sorted_size":32},
-        {"tau":0.01, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.02, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.03, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.04, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.05, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.06, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.09, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.12, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.18, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.21, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.24, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.27, "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.3,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.4,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
-        {"tau":0.5,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}
-      ]
-    }
-  ]
-}
diff --git a/bench/ann/conf/glove-50-angular.json b/bench/ann/conf/glove-50-angular.json
deleted file mode 100644
index 3e78c11814..0000000000
--- a/bench/ann/conf/glove-50-angular.json
+++ /dev/null
@@ -1,1351 +0,0 @@
-{
-  "dataset": {
-    "name": "glove-50-angular",
-    "base_file": "data/glove-50-angular/base.fbin",
-    "query_file": "data/glove-50-angular/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-angular/hnswlib/M12",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-50-angular/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-angular/hnswlib/M16",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-50-angular/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-angular/hnswlib/M24",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-50-angular/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/glove-50-angular/hnswlib/M36",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-50-angular/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/glove-50-angular/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/glove-50-angular/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/glove-50-angular/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/glove-50-angular/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/glove-50-angular/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/glove-50-angular/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/glove-50-angular/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/glove-50-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-angular/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-angular/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-angular/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-angular/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/glove-50-angular/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-angular/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-angular/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-angular/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-angular/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/glove-50-angular/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/glove-50-angular/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/glove-50-angular/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/glove-50-angular/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/glove-50-angular/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/glove-50-angular/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/glove-50-angular/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-50-angular/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/glove-50-angular/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/glove-50-angular/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/bench/ann/conf/lastfm-65-angular.json b/bench/ann/conf/lastfm-65-angular.json
deleted file mode 100644
index 62f8878bd6..0000000000
--- a/bench/ann/conf/lastfm-65-angular.json
+++ /dev/null
@@ -1,1351 +0,0 @@
-{
-  "dataset": {
-    "name": "lastfm-65-angular",
-    "base_file": "data/lastfm-65-angular/base.fbin",
-    "query_file": "data/lastfm-65-angular/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/lastfm-65-angular/hnswlib/M12",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/lastfm-65-angular/hnswlib/M16",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/lastfm-65-angular/hnswlib/M24",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/lastfm-65-angular/hnswlib/M36",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/lastfm-65-angular/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/lastfm-65-angular/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/lastfm-65-angular/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/lastfm-65-angular/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/lastfm-65-angular/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/lastfm-65-angular/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/lastfm-65-angular/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/lastfm-65-angular/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/bench/ann/conf/mnist-784-euclidean.json b/bench/ann/conf/mnist-784-euclidean.json
deleted file mode 100644
index 30e39a841c..0000000000
--- a/bench/ann/conf/mnist-784-euclidean.json
+++ /dev/null
@@ -1,1351 +0,0 @@
-{
-  "dataset": {
-    "name": "mnist-784-euclidean",
-    "base_file": "data/mnist-784-euclidean/base.fbin",
-    "query_file": "data/mnist-784-euclidean/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/mnist-784-euclidean/hnswlib/M12",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/mnist-784-euclidean/hnswlib/M16",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/mnist-784-euclidean/hnswlib/M24",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/mnist-784-euclidean/hnswlib/M36",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/mnist-784-euclidean/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/mnist-784-euclidean/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/mnist-784-euclidean/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/mnist-784-euclidean/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/mnist-784-euclidean/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/mnist-784-euclidean/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/mnist-784-euclidean/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/mnist-784-euclidean/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/bench/ann/conf/nytimes-256-angular.json b/bench/ann/conf/nytimes-256-angular.json
deleted file mode 100644
index 5d4e19d46b..0000000000
--- a/bench/ann/conf/nytimes-256-angular.json
+++ /dev/null
@@ -1,1351 +0,0 @@
-{
-  "dataset": {
-    "name": "nytimes-256-angular",
-    "base_file": "data/nytimes-256-angular/base.fbin",
-    "query_file": "data/nytimes-256-angular/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-angular/hnswlib/M12",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-angular/hnswlib/M16",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-angular/hnswlib/M24",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/nytimes-256-angular/hnswlib/M36",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "index/nytimes-256-angular/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/nytimes-256-angular/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/nytimes-256-angular/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/nytimes-256-angular/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/nytimes-256-angular/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/nytimes-256-angular/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/nytimes-256-angular/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/nytimes-256-angular/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/bench/ann/conf/sift-128-euclidean.json b/bench/ann/conf/sift-128-euclidean.json
deleted file mode 100644
index 116ea8d557..0000000000
--- a/bench/ann/conf/sift-128-euclidean.json
+++ /dev/null
@@ -1,498 +0,0 @@
-{
-  "dataset": {
-    "name": "sift-128-euclidean",
-    "base_file": "sift-128-euclidean/base.fbin",
-    "query_file": "sift-128-euclidean/query.fbin",
-    "groundtruth_neighbors_file": "sift-128-euclidean/groundtruth.neighbors.ibin",
-    "distance": "euclidean"
-  },
-
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10
-  },
-
-  "index": [
-    {
-      "name": "hnswlib.M12",
-      "algo": "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file": "sift-128-euclidean/hnswlib/M12",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M16",
-      "algo": "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file": "sift-128-euclidean/hnswlib/M16",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M24",
-      "algo": "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file": "sift-128-euclidean/hnswlib/M24",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "hnswlib.M36",
-      "algo": "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file": "sift-128-euclidean/hnswlib/M36",
-      "search_params": [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ]
-    },
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "build_param": {},
-      "file": "sift-128-euclidean/raft_bfknn/bfknn",
-      "search_params": [{"probe": 1}]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist": 1024},
-      "file": "sift-128-euclidean/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist": 2048},
-      "file": "sift-128-euclidean/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist": 4096},
-      "file": "sift-128-euclidean/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist": 8192},
-      "file": "sift-128-euclidean/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {"nlist": 16384},
-      "file": "sift-128-euclidean/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000},
-        {"nprobe": 2000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {"nlist": 1024, "M": 64, "useFloat16": true, "usePrecomputed": true},
-      "file": "sift-128-euclidean/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "sift-128-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 1024, "quantizer_type": "fp16"},
-      "file": "sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 2048, "quantizer_type": "fp16"},
-      "file": "sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 4096, "quantizer_type": "fp16"},
-      "file": "sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 8192, "quantizer_type": "fp16"},
-      "file": "sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 16384, "quantizer_type": "fp16"},
-      "file": "sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000},
-        {"nprobe": 2000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 1024, "quantizer_type": "int8"},
-      "file": "sift-128-euclidean/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 2048,"quantizer_type": "int8"},
-      "file": "sift-128-euclidean/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 4096, "quantizer_type": "int8"},
-      "file": "sift-128-euclidean/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 8192, "quantizer_type": "int8"},
-      "file": "sift-128-euclidean/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {"nlist": 16384, "quantizer_type": "int8"},
-      "file": "sift-128-euclidean/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000},
-        {"nprobe": 2000}
-      ]
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "sift-128-euclidean/faiss_flat/flat",
-      "search_params": [{}]
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-bitpq8-cluster1K",
-      "algo": "raft_ivf_pq",
-      "build_param": {"niter": 25, "nlist": 1000, "pq_dim": 64, "pq_bits": 8, "ratio": 1},
-      "file": "sift-128-euclidean/raft_ivf_pq/dimpq64-bitpq8-cluster1K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
-      ]
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-bitpq6-cluster1K",
-      "algo": "raft_ivf_pq",
-      "build_param": {"niter": 25, "nlist": 1000, "pq_dim": 128, "pq_bits": 6, "ratio": 1},
-      "file": "sift-128-euclidean/raft_ivf_pq/dimpq128-bitpq6-cluster1K",
-      "search_params": [
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
-        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
-        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
-        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
-      ]
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "build_param": {"nlist": 1024, "ratio": 1, "niter": 25},
-      "file": "sift-128-euclidean/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000}
-      ]
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "build_param": {"nlist": 16384, "ratio": 2, "niter": 20},
-      "file": "sift-128-euclidean/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {"nprobe": 1},
-        {"nprobe": 5},
-        {"nprobe": 10},
-        {"nprobe": 50},
-        {"nprobe": 100},
-        {"nprobe": 200},
-        {"nprobe": 500},
-        {"nprobe": 1000},
-        {"nprobe": 2000}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim32",
-      "algo": "raft_cagra",
-      "build_param": {"index_dim": 32},
-      "file": "sift-128-euclidean/raft_cagra/dim32",
-      "search_params": [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ]
-    },
-    {
-      "name": "raft_cagra.dim64",
-      "algo": "raft_cagra",
-      "build_param": {"index_dim": 64},
-      "file": "sift-128-euclidean/raft_cagra/dim64",
-      "search_params": [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ]
-    }
-  ]
-}
diff --git a/bench/ann/data_export.py b/bench/ann/data_export.py
deleted file mode 100644
index 87ca330ed9..0000000000
--- a/bench/ann/data_export.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import subprocess
-import json
-
-from pathlib import Path
-
-def parse_filepaths(fs):
-    for p in fs:
-        if p.endswith(".json") and os.path.exists(p):
-            yield p
-        else:
-            for f in Path(p).rglob('*.json'):
-                yield f.as_posix()
-
-def export_results(output_filepath, recompute, groundtruth_filepath,
-                   result_filepath):
-    print(f"Writing output file to: {output_filepath}")
-
-    parsed_filepaths = parse_filepaths(result_filepaths)
-
-    with open(output_filepath, 'w') as out:
-        out.write("Algo,Recall,QPS\n")
-
-        for fp in parsed_filepaths:
-            with open(fp, 'r') as f:
-                data = json.load(f)
-                for benchmark_case in data["benchmarks"]:
-                    algo = benchmark_case["name"]
-                    recall = benchmark_case["Recall"]
-                    qps = benchmark_case["items_per_second"]
-                    out.write(f"{algo},{recall},{qps}\n")
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--output", help="Path to the CSV output file",
-                        required=True)
-    parser.add_argument("--recompute", action="store_true",
-                        help="Recompute metrics")
-    parser.add_argument("--dataset",
-                        help="Name of the dataset to export results for",
-                        default="glove-100-inner")
-    parser.add_argument(
-        "--dataset-path",
-        help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"),
-                             "bench", "ann", "data")
-    )
-
-    args, result_filepaths = parser.parse_known_args()
-
-    # if nothing is provided
-    if len(result_filepaths) == 0:
-        raise ValueError("No filepaths to results were provided")
-
-    groundtruth_filepath = os.path.join(args.dataset_path, args.dataset,
-                                        "groundtruth.neighbors.ibin")
-    export_results(args.output, args.recompute, groundtruth_filepath,
-                   result_filepath)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bench/ann/get_dataset.py b/bench/ann/get_dataset.py
deleted file mode 100644
index a175384dc3..0000000000
--- a/bench/ann/get_dataset.py
+++ /dev/null
@@ -1,93 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import subprocess
-from urllib.request import urlretrieve
-
-
-def get_dataset_path(name, ann_bench_data_path):
-    if not os.path.exists(ann_bench_data_path):
-        os.mkdir(ann_bench_data_path)
-    return os.path.join(ann_bench_data_path, f"{name}.hdf5")
-
-
-def download_dataset(url, path):
-    if not os.path.exists(path):
-        print(f"downloading {url} -> {path}...")
-        urlretrieve(url, path)
-
-
-def convert_hdf5_to_fbin(path, normalize):
-    ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
-                                         "cpp/bench/ann/scripts")
-    ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
-                                          "hdf5_to_fbin.py")
-    if normalize and "angular" in path:
-        p = subprocess.Popen(["python", ann_bench_scripts_path, "-n",
-                              "%s" % path])
-    else:
-        p = subprocess.Popen(["python", ann_bench_scripts_path,
-                              "%s" % path])
-    p.wait()
-
-
-def move(name, ann_bench_data_path):
-    if "angular" in name:
-        new_name = name.replace("angular", "inner")
-    else:
-        new_name = name
-    new_path = os.path.join(ann_bench_data_path, new_name)
-    if not os.path.exists(new_path):
-        os.mkdir(new_path)
-    for bin_name in ["base.fbin", "query.fbin", "groundtruth.neighbors.ibin",
-                     "groundtruth.distances.fbin"]:
-        os.rename(f"{ann_bench_data_path}/{name}.{bin_name}",
-                  f"{new_path}/{bin_name}")
-
-
-def download(name, normalize, ann_bench_data_path):
-    path = get_dataset_path(name, ann_bench_data_path)
-    try:
-        url = f"http://ann-benchmarks.com/{name}.hdf5"
-        download_dataset(url, path)
-
-        convert_hdf5_to_fbin(path, normalize)
-
-        move(name, ann_bench_data_path)
-    except Exception:
-        print(f"Cannot download {url}")
-        raise
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--dataset", help="dataset to download",
-                        default="glove-100-angular")
-    parser.add_argument("--dataset-path", help="path to download dataset",
-                        default=os.path.join(os.getenv("RAFT_HOME"), 
-                                             "bench", "ann", "data"))
-    parser.add_argument("--normalize",
-                        help="normalize cosine distance to inner product",
-                        action="store_true")
-    args = parser.parse_args()
-
-    download(args.dataset, args.normalize, args.dataset_path)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bench/ann/plot.py b/bench/ann/plot.py
deleted file mode 100644
index 0020e398a9..0000000000
--- a/bench/ann/plot.py
+++ /dev/null
@@ -1,240 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script is inspired by 
-# 1: https://github.com/erikbern/ann-benchmarks/blob/main/plot.py
-# 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py
-# 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py
-# Licence: https://github.com/erikbern/ann-benchmarks/blob/main/LICENSE
-
-import matplotlib as mpl
-
-mpl.use("Agg")  # noqa
-import argparse
-import itertools
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-
-
-
-metrics = {
-    "k-nn": {
-        "description": "Recall",
-        "worst": float("-inf"),
-        "lim": [0.0, 1.03],
-    },
-    "qps": {
-        "description": "Queries per second (1/s)",
-        "worst": float("-inf"),
-    }
-}
-
-
-def generate_n_colors(n):
-    vs = np.linspace(0.3, 0.9, 7)
-    colors = [(0.9, 0.4, 0.4, 1.0)]
-
-    def euclidean(a, b):
-        return sum((x - y) ** 2 for x, y in zip(a, b))
-
-    while len(colors) < n:
-        new_color = max(itertools.product(vs, vs, vs), key=lambda a: min(euclidean(a, b) for b in colors))
-        colors.append(new_color + (1.0,))
-    return colors
-
-
-def create_linestyles(unique_algorithms):
-    colors = dict(zip(unique_algorithms, generate_n_colors(len(unique_algorithms))))
-    linestyles = dict((algo, ["--", "-.", "-", ":"][i % 4]) for i, algo in enumerate(unique_algorithms))
-    markerstyles = dict((algo, ["+", "<", "o", "*", "x"][i % 5]) for i, algo in enumerate(unique_algorithms))
-    faded = dict((algo, (r, g, b, 0.3)) for algo, (r, g, b, a) in colors.items())
-    return dict((algo, (colors[algo], faded[algo], linestyles[algo], markerstyles[algo])) for algo in unique_algorithms)
-
-
-def get_up_down(metric):
-    if metric["worst"] == float("inf"):
-        return "down"
-    return "up"
-
-
-def get_left_right(metric):
-    if metric["worst"] == float("inf"):
-        return "left"
-    return "right"
-
-
-def get_plot_label(xm, ym):
-    template = "%(xlabel)s-%(ylabel)s tradeoff - %(updown)s and" " to the %(leftright)s is better"
-    return template % {
-        "xlabel": xm["description"],
-        "ylabel": ym["description"],
-        "updown": get_up_down(ym),
-        "leftright": get_left_right(xm),
-    }
-
-
-def create_pointset(data, xn, yn):
-    xm, ym = (metrics[xn], metrics[yn])
-    rev_y = -1 if ym["worst"] < 0 else 1
-    rev_x = -1 if xm["worst"] < 0 else 1
-    data.sort(key=lambda t: (rev_y * t[-1], rev_x * t[-2]))
-
-    axs, ays, als = [], [], []
-    # Generate Pareto frontier
-    xs, ys, ls = [], [], []
-    last_x = xm["worst"]
-    comparator = (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
-    for algo_name, xv, yv in data:
-        if not xv or not yv:
-            continue
-        axs.append(xv)
-        ays.append(yv)
-        als.append(algo_name)
-        if comparator(xv, last_x):
-            last_x = xv
-            xs.append(xv)
-            ys.append(yv)
-            ls.append(algo_name)
-    return xs, ys, ls, axs, ays, als
-
-
-def create_plot(all_data, raw, x_scale, y_scale, fn_out, linestyles):
-    xn = "k-nn"
-    yn = "qps"
-    xm, ym = (metrics[xn], metrics[yn])
-    # Now generate each plot
-    handles = []
-    labels = []
-    plt.figure(figsize=(12, 9))
-
-    # Sorting by mean y-value helps aligning plots with labels
-    def mean_y(algo):
-        xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
-        return -np.log(np.array(ys)).mean()
-
-    # Find range for logit x-scale
-    min_x, max_x = 1, 0
-    for algo in sorted(all_data.keys(), key=mean_y):
-        xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
-        min_x = min([min_x] + [x for x in xs if x > 0])
-        max_x = max([max_x] + [x for x in xs if x < 1])
-        color, faded, linestyle, marker = linestyles[algo]
-        (handle,) = plt.plot(
-            xs, ys, "-", label=algo, color=color, ms=7, mew=3, lw=3, marker=marker
-        )
-        handles.append(handle)
-        if raw:
-            (handle2,) = plt.plot(
-                axs, ays, "-", label=algo, color=faded, ms=5, mew=2, lw=2, marker=marker
-            )
-        labels.append(algo)
-
-    ax = plt.gca()
-    ax.set_ylabel(ym["description"])
-    ax.set_xlabel(xm["description"])
-    # Custom scales of the type --x-scale a3
-    if x_scale[0] == "a":
-        alpha = float(x_scale[1:])
-
-        def fun(x):
-            return 1 - (1 - x) ** (1 / alpha)
-
-        def inv_fun(x):
-            return 1 - (1 - x) ** alpha
-
-        ax.set_xscale("function", functions=(fun, inv_fun))
-        if alpha <= 3:
-            ticks = [inv_fun(x) for x in np.arange(0, 1.2, 0.2)]
-            plt.xticks(ticks)
-        if alpha > 3:
-            from matplotlib import ticker
-
-            ax.xaxis.set_major_formatter(ticker.LogitFormatter())
-            # plt.xticks(ticker.LogitLocator().tick_values(min_x, max_x))
-            plt.xticks([0, 1 / 2, 1 - 1e-1, 1 - 1e-2, 1 - 1e-3, 1 - 1e-4, 1])
-    # Other x-scales
-    else:
-        ax.set_xscale(x_scale)
-    ax.set_yscale(y_scale)
-    ax.set_title(get_plot_label(xm, ym))
-    plt.gca().get_position()
-    # plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])
-    ax.legend(handles, labels, loc="center left", bbox_to_anchor=(1, 0.5), prop={"size": 9})
-    plt.grid(visible=True, which="major", color="0.65", linestyle="-")
-    plt.setp(ax.get_xminorticklabels(), visible=True)
-
-    # Logit scale has to be a subset of (0,1)
-    if "lim" in xm and x_scale != "logit":
-        x0, x1 = xm["lim"]
-        plt.xlim(max(x0, 0), min(x1, 1))
-    elif x_scale == "logit":
-        plt.xlim(min_x, max_x)
-    if "lim" in ym:
-        plt.ylim(ym["lim"])
-
-    # Workaround for bug https://github.com/matplotlib/matplotlib/issues/6789
-    ax.spines["bottom"]._adjust_location()
-
-    plt.savefig(fn_out, bbox_inches="tight")
-    plt.close()
-
-
-def load_all_results(result_filepath):
-    results = dict()
-    with open(result_filepath, 'r') as f:
-        for line in f.readlines()[1:]:
-            split_lines = line.split(',')
-            algo_name = split_lines[0].split('.')[0]
-            if algo_name not in results:
-                results[algo_name] = []
-            results[algo_name].append([algo_name, float(split_lines[1]), 
-                                  float(split_lines[2])])
-    return results
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--result-csv", help="Path to CSV Results", required=True)
-    parser.add_argument("--output", help="Path to the PNG output file",
-                        default=f"{os.getcwd()}/out.png")
-    parser.add_argument(
-        "--x-scale",
-        help="Scale to use when drawing the X-axis. \
-              Typically linear, logit or a2", 
-        default="linear"
-    )
-    parser.add_argument(
-        "--y-scale",
-        help="Scale to use when drawing the Y-axis",
-        choices=["linear", "log", "symlog", "logit"],
-        default="linear",
-    )
-    parser.add_argument(
-        "--raw", help="Show raw results (not just Pareto frontier) in faded colours", action="store_true"
-    )
-    args = parser.parse_args()
-
-    print(f"writing output to {args.output}")
-
-    results = load_all_results(args.result_csv)
-    linestyles = create_linestyles(sorted(results.keys()))
-
-    create_plot(results, args.raw, args.x_scale, args.y_scale, args.output, linestyles)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bench/ann/run.py b/bench/ann/run.py
deleted file mode 100644
index 60b9a012ad..0000000000
--- a/bench/ann/run.py
+++ /dev/null
@@ -1,234 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-import os
-import subprocess
-import yaml
-
-
-def positive_int(input_str: str) -> int:
-    try:
-        i = int(input_str)
-        if i < 1:
-            raise ValueError
-    except ValueError:
-        raise argparse.ArgumentTypeError(f"{input_str} is not a positive integer")
-
-    return i
-
-def validate_algorithm(algos_conf, algo):
-    algos_conf_keys = set(algos_conf.keys())
-    return algo in algos_conf_keys and not algos_conf[algo]["disabled"]
-
-
-def find_executable(algos_conf, algo):
-    executable = algos_conf[algo]["executable"]
-    conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann",
-                              executable)
-    build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
-    if os.path.exists(conda_path):
-        return (executable, conda_path)
-    elif os.path.exists(build_path):
-        return (executable, build_path)
-    else:
-        raise FileNotFoundError(executable)
-
-
-def run_build_and_search(conf_filename, conf_file, executables_to_run,
-                         force, conf_filedir, build, search, k, batch_size):
-    for executable, ann_executable_path in executables_to_run.keys():
-        # Need to write temporary configuration
-        temp_conf_filename = f"temporary_executable_{conf_filename}"
-        temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename)
-        with open(temp_conf_filepath, "w") as f:
-            temp_conf = dict()
-            temp_conf["dataset"] = conf_file["dataset"]
-            temp_conf["search_basic_param"] = conf_file["search_basic_param"]
-            temp_conf["index"] = executables_to_run[(executable,
-                                                     ann_executable_path)]["index"]
-            json.dump(temp_conf, f)
-
-        if build:
-            if force:
-                p = subprocess.Popen([ann_executable_path, "--build", "--overwrite",
-                                    temp_conf_filepath])
-                p.wait()
-            else:
-                p = subprocess.Popen([ann_executable_path, "--build",
-                                    temp_conf_filepath])
-                p.wait()
-
-        if search:
-            legacy_result_folder = "result/" + temp_conf["dataset"]["name"]
-            os.makedirs(legacy_result_folder, exist_ok=True)
-            p = subprocess.Popen([
-                ann_executable_path,
-                "--search",
-                "--benchmark_counters_tabular",
-                "--benchmark_out_format=json",
-                "--override_kv=k:%s" % k,
-                "--override_kv=n_queries:%s" % batch_size,
-                f"--benchmark_out={legacy_result_folder}/{executable}.json",
-                temp_conf_filepath])
-            p.wait()
-
-        os.remove(temp_conf_filepath)
-
-
-def main():
-    scripts_path = os.path.dirname(os.path.realpath(__file__))
-    # Read list of allowed algorithms
-    with open(f"{scripts_path}/algos.yaml", "r") as f:
-        algos_conf = yaml.safe_load(f)
-
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    parser.add_argument(
-        "-k", "--count", default=10, type=positive_int, help="the number of nearest neighbors to search for"
-    )
-    parser.add_argument(
-        "-bs", "--batch-size", default=10000, type=positive_int, help="number of query vectors to use in each query trial"
-    )
-    parser.add_argument(
-        "--configuration",
-        help="path to configuration file for a dataset",
-    )
-    parser.add_argument(
-        "--dataset",
-        help="dataset whose configuration file will be used",
-        default="glove-100-inner"
-    )
-    parser.add_argument(
-        "--dataset-path",
-        help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"),
-                             "bench", "ann", "data")
-    )
-    parser.add_argument(
-        "--build",
-        action="store_true"
-    )
-    parser.add_argument(
-        "--search",
-        action="store_true"
-    )
-    parser.add_argument("--algorithms",
-                        help="run only comma separated list of named \
-                              algorithms",
-                        default=None)
-    parser.add_argument("--indices",
-                        help="run only comma separated list of named indices. \
-                              parameter `algorithms` is ignored",
-                        default=None)
-    parser.add_argument("-f", "--force",
-                        help="re-run algorithms even if their results \
-                              already exist",
-                        action="store_true")
-
-    args = parser.parse_args()
-
-    # If both build and search are not provided,
-    # run both
-    if not args.build and not args.search:
-        build = True
-        search = True
-    else:
-        build = args.build
-        search = args.search
-
-    k = args.count
-    batch_size = args.batch_size
-
-    # Read configuration file associated to dataset
-    if args.configuration:
-        conf_filepath = args.configuration
-    else:
-        conf_filepath = os.path.join(scripts_path, "conf", f"{args.dataset}.json")
-    conf_filename = conf_filepath.split("/")[-1]
-    conf_filedir = "/".join(conf_filepath.split("/")[:-1])
-    dataset_name = conf_filename.replace(".json", "")
-    dataset_path = os.path.realpath(os.path.join(args.dataset_path, dataset_name))
-    if not os.path.exists(conf_filepath):
-        raise FileNotFoundError(conf_filename)
-
-    with open(conf_filepath, "r") as f:
-        conf_file = json.load(f)
-
-    # Replace base, query to dataset-path
-    conf_file["dataset"]["base_file"] = os.path.join(dataset_path, "base.fbin")
-    conf_file["dataset"]["query_file"] = os.path.join(dataset_path, "query.fbin")
-    conf_file["dataset"]["groundtruth_neighbors_file"] = os.path.join(dataset_path, "groundtruth.neighbors.ibin")
-    # Ensure base and query files exist for dataset
-    if not os.path.exists(conf_file["dataset"]["base_file"]):
-        raise FileNotFoundError(conf_file["dataset"]["base_file"])
-    if not os.path.exists(conf_file["dataset"]["query_file"]):
-        raise FileNotFoundError(conf_file["dataset"]["query_file"])
-
-    executables_to_run = dict()
-    # At least one named index should exist in config file
-    if args.indices:
-        indices = set(args.indices.split(","))
-        # algo associated with index should still be present in algos.yaml
-        # and enabled
-        for index in conf_file["index"]:
-            curr_algo = index["algo"]
-            if index["name"] in indices and \
-                    validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo)
-                if executable_path not in executables_to_run:
-                    executables_to_run[executable_path] = {"index": []}
-                executables_to_run[executable_path]["index"].append(index)
-
-    # switch to named algorithms if indices parameter is not supplied
-    elif args.algorithms:
-        algorithms = set(args.algorithms.split(","))
-        # pick out algorithms from conf file that exist
-        # and are enabled in algos.yaml
-        for index in conf_file["index"]:
-            curr_algo = index["algo"]
-            if curr_algo in algorithms and \
-                    validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo)
-                if executable_path not in executables_to_run:
-                    executables_to_run[executable_path] = {"index": []}
-                executables_to_run[executable_path]["index"].append(index)
-
-    # default, try to run all available algorithms
-    else:
-        for index in conf_file["index"]:
-            curr_algo = index["algo"]
-            if validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo)
-                if executable_path not in executables_to_run:
-                    executables_to_run[executable_path] = {"index": []}
-                executables_to_run[executable_path]["index"].append(index)
-
-    # Replace build, search to dataset path
-    for executable_path in executables_to_run:
-        for pos, index in enumerate(executables_to_run[executable_path]["index"]):
-            index["file"] = os.path.join(dataset_path, "index", index["name"])
-            index["search_result_file"] = \
-                os.path.join(dataset_path, "result", index["name"])
-            executables_to_run[executable_path]["index"][pos] = index
-
-    run_build_and_search(conf_filename, conf_file, executables_to_run,
-                         args.force, conf_filedir, build, search, k, batch_size)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bench/ann/split_groundtruth.py b/bench/ann/split_groundtruth.py
deleted file mode 100644
index cd67d9c8b8..0000000000
--- a/bench/ann/split_groundtruth.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import subprocess
-
-
-def split_groundtruth(groundtruth_filepath):
-    ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
-                                         "cpp/bench/ann/scripts")
-    ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
-                                          "split_groundtruth.pl")
-    pwd = os.getcwd()
-    os.chdir("/".join(groundtruth_filepath.split("/")[:-1]))
-    groundtruth_filename = groundtruth_filepath.split("/")[-1]
-    p = subprocess.Popen([ann_bench_scripts_path, groundtruth_filename, 
-                          "groundtruth"])
-    p.wait()
-    os.chdir(pwd)
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--groundtruth",
-                        help="Path to billion-scale dataset groundtruth file",
-                        required=True)
-    args = parser.parse_args()
-
-    split_groundtruth(args.groundtruth)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
index 87ca330ed9..8f04d83925 100644
--- a/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
+++ b/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
@@ -48,6 +48,10 @@ def export_results(output_filepath, recompute, groundtruth_filepath,
 
 
 def main():
+    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
+        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
+    else:
+        default_dataset_path = os.path.join(call_path, "datasets/")
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument("--output", help="Path to the CSV output file",
@@ -60,8 +64,7 @@ def main():
     parser.add_argument(
         "--dataset-path",
         help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"),
-                             "bench", "ann", "data")
+        default=default_dataset_path
     )
 
     args, result_filepaths = parser.parse_known_args()
diff --git a/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py b/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
index a175384dc3..affac08307 100644
--- a/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
+++ b/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
@@ -32,10 +32,7 @@ def download_dataset(url, path):
 
 
 def convert_hdf5_to_fbin(path, normalize):
-    ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
-                                         "cpp/bench/ann/scripts")
-    ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
-                                          "hdf5_to_fbin.py")
+    ann_bench_scripts_path = "hdf5_to_fbin.py"
     if normalize and "angular" in path:
         p = subprocess.Popen(["python", ann_bench_scripts_path, "-n",
                               "%s" % path])
@@ -74,13 +71,16 @@ def download(name, normalize, ann_bench_data_path):
 
 
 def main():
+    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
+        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
+    else:
+        default_dataset_path = os.path.join(call_path, "datasets/")
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument("--dataset", help="dataset to download",
                         default="glove-100-angular")
     parser.add_argument("--dataset-path", help="path to download dataset",
-                        default=os.path.join(os.getenv("RAFT_HOME"), 
-                                             "bench", "ann", "data"))
+                        default=default_dataset_path)
     parser.add_argument("--normalize",
                         help="normalize cosine distance to inner product",
                         action="store_true")
diff --git a/python/raft-ann-bench/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/raft-ann-bench/run/__main__.py
index 60b9a012ad..eded60a3b0 100644
--- a/python/raft-ann-bench/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/raft-ann-bench/run/__main__.py
@@ -41,8 +41,10 @@ def find_executable(algos_conf, algo):
                               executable)
     build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
     if os.path.exists(conda_path):
+        print("Using RAFT bench found in conda environment: ")
         return (executable, conda_path)
     elif os.path.exists(build_path):
+        print(f"Using RAFT bench from repository specified in {build_path}: ")
         return (executable, build_path)
     else:
         raise FileNotFoundError(executable)
@@ -91,10 +93,21 @@ def run_build_and_search(conf_filename, conf_file, executables_to_run,
 
 def main():
     scripts_path = os.path.dirname(os.path.realpath(__file__))
+    call_path = os.getcwd()
     # Read list of allowed algorithms
-    with open(f"{scripts_path}/algos.yaml", "r") as f:
+    try:
+        import pylibraft
+        algo_file = "algos.yaml"
+    except ImportError:
+        algo_file = "algos_cpu.yaml"
+    with open(f"{scripts_path}/{algo_file}", "r") as f:
         algos_conf = yaml.safe_load(f)
 
+    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
+        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
+    else:
+        default_dataset_path = os.path.join(call_path, "datasets/")
+
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
@@ -116,8 +129,7 @@ def main():
     parser.add_argument(
         "--dataset-path",
         help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"),
-                             "bench", "ann", "data")
+        default=default_dataset_path
     )
     parser.add_argument(
         "--build",
diff --git a/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py b/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
index cd67d9c8b8..ee5f61ec9e 100644
--- a/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
+++ b/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
@@ -19,10 +19,7 @@
 
 
 def split_groundtruth(groundtruth_filepath):
-    ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
-                                         "cpp/bench/ann/scripts")
-    ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
-                                          "split_groundtruth.pl")
+    ann_bench_scripts_path = "split_groundtruth.pl"
     pwd = os.getcwd()
     os.chdir("/".join(groundtruth_filepath.split("/")[:-1]))
     groundtruth_filename = groundtruth_filepath.split("/")[-1]

From 82f195ec3c4112a5738a153d9d06724cee090426 Mon Sep 17 00:00:00 2001
From: divyegala <divyegala@gmail.com>
Date: Fri, 25 Aug 2023 11:18:45 -0700
Subject: [PATCH 23/70] write build,search results

---
 bench/ann/run.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/bench/ann/run.py b/bench/ann/run.py
index e64148abd8..8da3eadc3b 100644
--- a/bench/ann/run.py
+++ b/bench/ann/run.py
@@ -41,9 +41,9 @@ def find_executable(algos_conf, algo):
                               executable)
     build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
     if os.path.exists(conda_path):
-        return (executable, conda_path)
+        return (executable, conda_path, algo)
     elif os.path.exists(build_path):
-        return (executable, build_path)
+        return (executable, build_path, algo)
     else:
         raise FileNotFoundError(executable)
 
@@ -51,7 +51,7 @@ def find_executable(algos_conf, algo):
 def run_build_and_search(conf_file, conf_filename, conf_filedir,
                          executables_to_run, dataset_path, force,
                          build, search, k, batch_size):
-    for executable, ann_executable_path in executables_to_run.keys():
+    for executable, ann_executable_path, algo in executables_to_run.keys():
         # Need to write temporary configuration
         temp_conf_filename = f"temporary_{conf_filename}"
         temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename)
@@ -60,13 +60,20 @@ def run_build_and_search(conf_file, conf_filename, conf_filedir,
             temp_conf["dataset"] = conf_file["dataset"]
             temp_conf["search_basic_param"] = conf_file["search_basic_param"]
             temp_conf["index"] = executables_to_run[(executable, 
-                                                     ann_executable_path)]["index"]
+                                                     ann_executable_path,
+                                                     algo)]["index"]
             json.dump(temp_conf, f)
 
+        legacy_result_folder = os.path.join(dataset_path, conf_file['dataset']['name'], 'result')
+        os.makedirs(legacy_result_folder, exist_ok=True)
         if build:
+            build_folder = os.path.join(legacy_result_folder, "build")
+            os.makedirs(build_folder, exist_ok=True)
             cmd = [ann_executable_path,
                    "--build",
-                   "--data_prefix="+dataset_path]
+                   "--data_prefix="+dataset_path,
+                   "--benchmark_out_format=csv",
+                   f"--benchmark_out={os.path.join(build_folder, f'{algo}.csv')}"]
             if force:
                 cmd = cmd + ["--overwrite"]
             cmd = cmd + [temp_conf_filepath]
@@ -75,16 +82,16 @@ def run_build_and_search(conf_file, conf_filename, conf_filedir,
             p.wait()
 
         if search:
-            legacy_result_folder = os.path.join(dataset_path, conf_file['dataset']['name'], 'result')
-            os.makedirs(legacy_result_folder, exist_ok=True)
+            search_folder = os.path.join(legacy_result_folder, "search")
+            os.makedirs(search_folder, exist_ok=True)
             cmd = [ann_executable_path,
                    "--search",
                    "--data_prefix="+dataset_path,
                    "--benchmark_counters_tabular",
-                   "--benchmark_out_format=csv",
                    "--override_kv=k:%s" % k,
                    "--override_kv=n_queries:%s" % batch_size,
-                   f"--benchmark_out={os.path.join(dataset_path, conf_file['dataset']['name'], 'result', f'{executable}.csv')}"]
+                   "--benchmark_out_format=csv",
+                   f"--benchmark_out={os.path.join(search_folder, f'{algo}.csv')}"]
             if force:
                 cmd = cmd + ["--overwrite"]
             cmd = cmd + [temp_conf_filepath]

From be6eb5677580236fcf60ff92334591cbc9ff7131 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 25 Aug 2023 16:23:06 -0500
Subject: [PATCH 24/70] FIX PEP8 fixes

---
 .../raft-ann-bench/data_export/__main__.py    |  51 +++---
 .../raft-ann-bench/get_dataset/__main__.py    |  46 +++--
 .../get_dataset/fbin_to_f16bin.py             |   6 +-
 .../get_dataset/hdf5_to_fbin.py               |   6 +-
 .../raft-ann-bench/plot/__main__.py           | 122 +++++++++----
 .../raft-ann-bench/run/__main__.py            | 170 ++++++++++++------
 .../split_groundtruth/__main__.py             |  16 +-
 7 files changed, 281 insertions(+), 136 deletions(-)

diff --git a/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
index 8f04d83925..80e3dcad93 100644
--- a/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
+++ b/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
@@ -14,31 +14,32 @@
 # limitations under the License.
 
 import argparse
-import os
-import subprocess
 import json
-
+import os
 from pathlib import Path
 
+
 def parse_filepaths(fs):
     for p in fs:
         if p.endswith(".json") and os.path.exists(p):
             yield p
         else:
-            for f in Path(p).rglob('*.json'):
+            for f in Path(p).rglob("*.json"):
                 yield f.as_posix()
 
-def export_results(output_filepath, recompute, groundtruth_filepath,
-                   result_filepath):
+
+def export_results(
+    output_filepath, recompute, groundtruth_filepath, result_filepaths
+):
     print(f"Writing output file to: {output_filepath}")
 
     parsed_filepaths = parse_filepaths(result_filepaths)
 
-    with open(output_filepath, 'w') as out:
+    with open(output_filepath, "w") as out:
         out.write("Algo,Recall,QPS\n")
 
         for fp in parsed_filepaths:
-            with open(fp, 'r') as f:
+            with open(fp, "r") as f:
                 data = json.load(f)
                 for benchmark_case in data["benchmarks"]:
                     algo = benchmark_case["name"]
@@ -48,23 +49,29 @@ def export_results(output_filepath, recompute, groundtruth_filepath,
 
 
 def main():
+    call_path = os.getcwd()
     if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
         default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
     else:
         default_dataset_path = os.path.join(call_path, "datasets/")
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--output", help="Path to the CSV output file",
-                        required=True)
-    parser.add_argument("--recompute", action="store_true",
-                        help="Recompute metrics")
-    parser.add_argument("--dataset",
-                        help="Name of the dataset to export results for",
-                        default="glove-100-inner")
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--output", help="Path to the CSV output file", required=True
+    )
+    parser.add_argument(
+        "--recompute", action="store_true", help="Recompute metrics"
+    )
+    parser.add_argument(
+        "--dataset",
+        help="Name of the dataset to export results for",
+        default="glove-100-inner",
+    )
     parser.add_argument(
         "--dataset-path",
         help="path to dataset folder",
-        default=default_dataset_path
+        default=default_dataset_path,
     )
 
     args, result_filepaths = parser.parse_known_args()
@@ -73,10 +80,12 @@ def main():
     if len(result_filepaths) == 0:
         raise ValueError("No filepaths to results were provided")
 
-    groundtruth_filepath = os.path.join(args.dataset_path, args.dataset,
-                                        "groundtruth.neighbors.ibin")
-    export_results(args.output, args.recompute, groundtruth_filepath,
-                   result_filepath)
+    groundtruth_filepath = os.path.join(
+        args.dataset_path, args.dataset, "groundtruth.neighbors.ibin"
+    )
+    export_results(
+        args.output, args.recompute, groundtruth_filepath, result_filepaths
+    )
 
 
 if __name__ == "__main__":
diff --git a/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py b/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
index affac08307..605146a84e 100644
--- a/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
+++ b/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
@@ -34,11 +34,11 @@ def download_dataset(url, path):
 def convert_hdf5_to_fbin(path, normalize):
     ann_bench_scripts_path = "hdf5_to_fbin.py"
     if normalize and "angular" in path:
-        p = subprocess.Popen(["python", ann_bench_scripts_path, "-n",
-                              "%s" % path])
+        p = subprocess.Popen(
+            ["python", ann_bench_scripts_path, "-n", "%s" % path]
+        )
     else:
-        p = subprocess.Popen(["python", ann_bench_scripts_path,
-                              "%s" % path])
+        p = subprocess.Popen(["python", ann_bench_scripts_path, "%s" % path])
     p.wait()
 
 
@@ -50,10 +50,16 @@ def move(name, ann_bench_data_path):
     new_path = os.path.join(ann_bench_data_path, new_name)
     if not os.path.exists(new_path):
         os.mkdir(new_path)
-    for bin_name in ["base.fbin", "query.fbin", "groundtruth.neighbors.ibin",
-                     "groundtruth.distances.fbin"]:
-        os.rename(f"{ann_bench_data_path}/{name}.{bin_name}",
-                  f"{new_path}/{bin_name}")
+    for bin_name in [
+        "base.fbin",
+        "query.fbin",
+        "groundtruth.neighbors.ibin",
+        "groundtruth.distances.fbin",
+    ]:
+        os.rename(
+            f"{ann_bench_data_path}/{name}.{bin_name}",
+            f"{new_path}/{bin_name}",
+        )
 
 
 def download(name, normalize, ann_bench_data_path):
@@ -71,19 +77,27 @@ def download(name, normalize, ann_bench_data_path):
 
 
 def main():
+    call_path = os.getcwd()
     if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
         default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
     else:
         default_dataset_path = os.path.join(call_path, "datasets/")
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--dataset", help="dataset to download",
-                        default="glove-100-angular")
-    parser.add_argument("--dataset-path", help="path to download dataset",
-                        default=default_dataset_path)
-    parser.add_argument("--normalize",
-                        help="normalize cosine distance to inner product",
-                        action="store_true")
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset", help="dataset to download", default="glove-100-angular"
+    )
+    parser.add_argument(
+        "--dataset-path",
+        help="path to download dataset",
+        default=default_dataset_path,
+    )
+    parser.add_argument(
+        "--normalize",
+        help="normalize cosine distance to inner product",
+        action="store_true",
+    )
     args = parser.parse_args()
 
     download(args.dataset, args.normalize, args.dataset_path)
diff --git a/python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py b/python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py
index d3a929d581..ee7410e0cc 100755
--- a/python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py
+++ b/python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py
@@ -14,10 +14,10 @@
 # limitations under the License.
 
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
+
 import sys
+
 import numpy as np
 
 
diff --git a/python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py b/python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py
index 04bdbb5720..ba853c63f5 100755
--- a/python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py
+++ b/python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py
@@ -15,8 +15,9 @@
 
 
 import sys
-import numpy as np
+
 import h5py
+import numpy as np
 
 
 def normalize(x):
@@ -68,7 +69,8 @@ def write_bin(fname, data):
         query = normalize(query)
     elif hdf5.attrs["distance"] == "angular":
         print(
-            "warning: input has angular distance, specify -n to normalize base/query set!\n"
+            "warning: input has angular distance, ",
+            "specify -n to normalize base/query set!\n",
         )
 
     output_fname = fname_prefix + ".base.fbin"
diff --git a/python/raft-ann-bench/raft-ann-bench/plot/__main__.py b/python/raft-ann-bench/raft-ann-bench/plot/__main__.py
index 0020e398a9..5f81019d8c 100644
--- a/python/raft-ann-bench/raft-ann-bench/plot/__main__.py
+++ b/python/raft-ann-bench/raft-ann-bench/plot/__main__.py
@@ -13,22 +13,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# This script is inspired by 
+# This script is inspired by
 # 1: https://github.com/erikbern/ann-benchmarks/blob/main/plot.py
-# 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py
-# 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py
+# 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py  # noqa: E501
+# 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py  # noqa: E501
 # Licence: https://github.com/erikbern/ann-benchmarks/blob/main/LICENSE
 
-import matplotlib as mpl
-
-mpl.use("Agg")  # noqa
 import argparse
 import itertools
-import matplotlib.pyplot as plt
-import numpy as np
 import os
 
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
 
+mpl.use("Agg")
 
 metrics = {
     "k-nn": {
@@ -39,7 +38,7 @@
     "qps": {
         "description": "Queries per second (1/s)",
         "worst": float("-inf"),
-    }
+    },
 }
 
 
@@ -51,17 +50,36 @@ def euclidean(a, b):
         return sum((x - y) ** 2 for x, y in zip(a, b))
 
     while len(colors) < n:
-        new_color = max(itertools.product(vs, vs, vs), key=lambda a: min(euclidean(a, b) for b in colors))
+        new_color = max(
+            itertools.product(vs, vs, vs),
+            key=lambda a: min(euclidean(a, b) for b in colors),
+        )
         colors.append(new_color + (1.0,))
     return colors
 
 
 def create_linestyles(unique_algorithms):
-    colors = dict(zip(unique_algorithms, generate_n_colors(len(unique_algorithms))))
-    linestyles = dict((algo, ["--", "-.", "-", ":"][i % 4]) for i, algo in enumerate(unique_algorithms))
-    markerstyles = dict((algo, ["+", "<", "o", "*", "x"][i % 5]) for i, algo in enumerate(unique_algorithms))
-    faded = dict((algo, (r, g, b, 0.3)) for algo, (r, g, b, a) in colors.items())
-    return dict((algo, (colors[algo], faded[algo], linestyles[algo], markerstyles[algo])) for algo in unique_algorithms)
+    colors = dict(
+        zip(unique_algorithms, generate_n_colors(len(unique_algorithms)))
+    )
+    linestyles = dict(
+        (algo, ["--", "-.", "-", ":"][i % 4])
+        for i, algo in enumerate(unique_algorithms)
+    )
+    markerstyles = dict(
+        (algo, ["+", "<", "o", "*", "x"][i % 5])
+        for i, algo in enumerate(unique_algorithms)
+    )
+    faded = dict(
+        (algo, (r, g, b, 0.3)) for algo, (r, g, b, a) in colors.items()
+    )
+    return dict(
+        (
+            algo,
+            (colors[algo], faded[algo], linestyles[algo], markerstyles[algo]),
+        )
+        for algo in unique_algorithms
+    )
 
 
 def get_up_down(metric):
@@ -77,7 +95,10 @@ def get_left_right(metric):
 
 
 def get_plot_label(xm, ym):
-    template = "%(xlabel)s-%(ylabel)s tradeoff - %(updown)s and" " to the %(leftright)s is better"
+    template = (
+        "%(xlabel)s-%(ylabel)s tradeoff - %(updown)s and"
+        " to the %(leftright)s is better"
+    )
     return template % {
         "xlabel": xm["description"],
         "ylabel": ym["description"],
@@ -96,7 +117,9 @@ def create_pointset(data, xn, yn):
     # Generate Pareto frontier
     xs, ys, ls = [], [], []
     last_x = xm["worst"]
-    comparator = (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
+    comparator = (
+        (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
+    )
     for algo_name, xv, yv in data:
         if not xv or not yv:
             continue
@@ -133,12 +156,28 @@ def mean_y(algo):
         max_x = max([max_x] + [x for x in xs if x < 1])
         color, faded, linestyle, marker = linestyles[algo]
         (handle,) = plt.plot(
-            xs, ys, "-", label=algo, color=color, ms=7, mew=3, lw=3, marker=marker
+            xs,
+            ys,
+            "-",
+            label=algo,
+            color=color,
+            ms=7,
+            mew=3,
+            lw=3,
+            marker=marker,
         )
         handles.append(handle)
         if raw:
             (handle2,) = plt.plot(
-                axs, ays, "-", label=algo, color=faded, ms=5, mew=2, lw=2, marker=marker
+                axs,
+                ays,
+                "-",
+                label=algo,
+                color=faded,
+                ms=5,
+                mew=2,
+                lw=2,
+                marker=marker,
             )
         labels.append(algo)
 
@@ -172,7 +211,13 @@ def inv_fun(x):
     ax.set_title(get_plot_label(xm, ym))
     plt.gca().get_position()
     # plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])
-    ax.legend(handles, labels, loc="center left", bbox_to_anchor=(1, 0.5), prop={"size": 9})
+    ax.legend(
+        handles,
+        labels,
+        loc="center left",
+        bbox_to_anchor=(1, 0.5),
+        prop={"size": 9},
+    )
     plt.grid(visible=True, which="major", color="0.65", linestyle="-")
     plt.setp(ax.get_xminorticklabels(), visible=True)
 
@@ -194,28 +239,35 @@ def inv_fun(x):
 
 def load_all_results(result_filepath):
     results = dict()
-    with open(result_filepath, 'r') as f:
+    with open(result_filepath, "r") as f:
         for line in f.readlines()[1:]:
-            split_lines = line.split(',')
-            algo_name = split_lines[0].split('.')[0]
+            split_lines = line.split(",")
+            algo_name = split_lines[0].split(".")[0]
             if algo_name not in results:
                 results[algo_name] = []
-            results[algo_name].append([algo_name, float(split_lines[1]), 
-                                  float(split_lines[2])])
+            results[algo_name].append(
+                [algo_name, float(split_lines[1]), float(split_lines[2])]
+            )
     return results
 
 
 def main():
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--result-csv", help="Path to CSV Results", required=True)
-    parser.add_argument("--output", help="Path to the PNG output file",
-                        default=f"{os.getcwd()}/out.png")
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--result-csv", help="Path to CSV Results", required=True
+    )
+    parser.add_argument(
+        "--output",
+        help="Path to the PNG output file",
+        default=f"{os.getcwd()}/out.png",
+    )
     parser.add_argument(
         "--x-scale",
         help="Scale to use when drawing the X-axis. \
-              Typically linear, logit or a2", 
-        default="linear"
+              Typically linear, logit or a2",
+        default="linear",
     )
     parser.add_argument(
         "--y-scale",
@@ -224,7 +276,9 @@ def main():
         default="linear",
     )
     parser.add_argument(
-        "--raw", help="Show raw results (not just Pareto frontier) in faded colours", action="store_true"
+        "--raw",
+        help="Show raw results (not just Pareto frontier) in faded colours",
+        action="store_true",
     )
     args = parser.parse_args()
 
@@ -233,7 +287,9 @@ def main():
     results = load_all_results(args.result_csv)
     linestyles = create_linestyles(sorted(results.keys()))
 
-    create_plot(results, args.raw, args.x_scale, args.y_scale, args.output, linestyles)
+    create_plot(
+        results, args.raw, args.x_scale, args.y_scale, args.output, linestyles
+    )
 
 
 if __name__ == "__main__":
diff --git a/python/raft-ann-bench/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/raft-ann-bench/run/__main__.py
index eded60a3b0..917e2e76da 100644
--- a/python/raft-ann-bench/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/raft-ann-bench/run/__main__.py
@@ -17,6 +17,7 @@
 import json
 import os
 import subprocess
+
 import yaml
 
 
@@ -26,10 +27,13 @@ def positive_int(input_str: str) -> int:
         if i < 1:
             raise ValueError
     except ValueError:
-        raise argparse.ArgumentTypeError(f"{input_str} is not a positive integer")
+        raise argparse.ArgumentTypeError(
+            f"{input_str} is not a positive integer"
+        )
 
     return i
 
+
 def validate_algorithm(algos_conf, algo):
     algos_conf_keys = set(algos_conf.keys())
     return algo in algos_conf_keys and not algos_conf[algo]["disabled"]
@@ -37,9 +41,12 @@ def validate_algorithm(algos_conf, algo):
 
 def find_executable(algos_conf, algo):
     executable = algos_conf[algo]["executable"]
-    conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann",
-                              executable)
-    build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
+    conda_path = os.path.join(
+        os.getenv("CONDA_PREFIX"), "bin", "ann", executable
+    )
+    build_path = os.path.join(
+        os.getenv("RAFT_HOME"), "cpp", "build", executable
+    )
     if os.path.exists(conda_path):
         print("Using RAFT bench found in conda environment: ")
         return (executable, conda_path)
@@ -50,8 +57,17 @@ def find_executable(algos_conf, algo):
         raise FileNotFoundError(executable)
 
 
-def run_build_and_search(conf_filename, conf_file, executables_to_run,
-                         force, conf_filedir, build, search, k, batch_size):
+def run_build_and_search(
+    conf_filename,
+    conf_file,
+    executables_to_run,
+    force,
+    conf_filedir,
+    build,
+    search,
+    k,
+    batch_size,
+):
     for executable, ann_executable_path in executables_to_run.keys():
         # Need to write temporary configuration
         temp_conf_filename = f"temporary_executable_{conf_filename}"
@@ -60,32 +76,43 @@ def run_build_and_search(conf_filename, conf_file, executables_to_run,
             temp_conf = dict()
             temp_conf["dataset"] = conf_file["dataset"]
             temp_conf["search_basic_param"] = conf_file["search_basic_param"]
-            temp_conf["index"] = executables_to_run[(executable,
-                                                     ann_executable_path)]["index"]
+            temp_conf["index"] = executables_to_run[
+                (executable, ann_executable_path)
+            ]["index"]
             json.dump(temp_conf, f)
 
         if build:
             if force:
-                p = subprocess.Popen([ann_executable_path, "--build", "--overwrite",
-                                    temp_conf_filepath])
+                p = subprocess.Popen(
+                    [
+                        ann_executable_path,
+                        "--build",
+                        "--overwrite",
+                        temp_conf_filepath,
+                    ]
+                )
                 p.wait()
             else:
-                p = subprocess.Popen([ann_executable_path, "--build",
-                                    temp_conf_filepath])
+                p = subprocess.Popen(
+                    [ann_executable_path, "--build", temp_conf_filepath]
+                )
                 p.wait()
 
         if search:
             legacy_result_folder = "result/" + temp_conf["dataset"]["name"]
             os.makedirs(legacy_result_folder, exist_ok=True)
-            p = subprocess.Popen([
-                ann_executable_path,
-                "--search",
-                "--benchmark_counters_tabular",
-                "--benchmark_out_format=json",
-                "--override_kv=k:%s" % k,
-                "--override_kv=n_queries:%s" % batch_size,
-                f"--benchmark_out={legacy_result_folder}/{executable}.json",
-                temp_conf_filepath])
+            p = subprocess.Popen(
+                [
+                    ann_executable_path,
+                    "--search",
+                    "--benchmark_counters_tabular",
+                    "--benchmark_out_format=json",
+                    "--override_kv=k:%s" % k,
+                    "--override_kv=n_queries:%s" % batch_size,
+                    f"--benchmark_out={legacy_result_folder}/{executable}.json",  # noqa: E501
+                    temp_conf_filepath,
+                ]
+            )
             p.wait()
 
         os.remove(temp_conf_filepath)
@@ -96,7 +123,8 @@ def main():
     call_path = os.getcwd()
     # Read list of allowed algorithms
     try:
-        import pylibraft
+        import pylibraft  # noqa: F401
+
         algo_file = "algos.yaml"
     except ImportError:
         algo_file = "algos_cpu.yaml"
@@ -109,13 +137,22 @@ def main():
         default_dataset_path = os.path.join(call_path, "datasets/")
 
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
 
     parser.add_argument(
-        "-k", "--count", default=10, type=positive_int, help="the number of nearest neighbors to search for"
+        "-k",
+        "--count",
+        default=10,
+        type=positive_int,
+        help="the number of nearest neighbors to search for",
     )
     parser.add_argument(
-        "-bs", "--batch-size", default=10000, type=positive_int, help="number of query vectors to use in each query trial"
+        "-bs",
+        "--batch-size",
+        default=10000,
+        type=positive_int,
+        help="number of query vectors to use in each query trial",
     )
     parser.add_argument(
         "--configuration",
@@ -124,33 +161,34 @@ def main():
     parser.add_argument(
         "--dataset",
         help="dataset whose configuration file will be used",
-        default="glove-100-inner"
+        default="glove-100-inner",
     )
     parser.add_argument(
         "--dataset-path",
         help="path to dataset folder",
-        default=default_dataset_path
+        default=default_dataset_path,
     )
+    parser.add_argument("--build", action="store_true")
+    parser.add_argument("--search", action="store_true")
     parser.add_argument(
-        "--build",
-        action="store_true"
+        "--algorithms",
+        help="run only comma separated list of named \
+                              algorithms",
+        default=None,
     )
     parser.add_argument(
-        "--search",
-        action="store_true"
-    )
-    parser.add_argument("--algorithms",
-                        help="run only comma separated list of named \
-                              algorithms",
-                        default=None)
-    parser.add_argument("--indices",
-                        help="run only comma separated list of named indices. \
+        "--indices",
+        help="run only comma separated list of named indices. \
                               parameter `algorithms` is ignored",
-                        default=None)
-    parser.add_argument("-f", "--force",
-                        help="re-run algorithms even if their results \
+        default=None,
+    )
+    parser.add_argument(
+        "-f",
+        "--force",
+        help="re-run algorithms even if their results \
                               already exist",
-                        action="store_true")
+        action="store_true",
+    )
 
     args = parser.parse_args()
 
@@ -170,11 +208,15 @@ def main():
     if args.configuration:
         conf_filepath = args.configuration
     else:
-        conf_filepath = os.path.join(scripts_path, "conf", f"{args.dataset}.json")
+        conf_filepath = os.path.join(
+            scripts_path, "conf", f"{args.dataset}.json"
+        )
     conf_filename = conf_filepath.split("/")[-1]
     conf_filedir = "/".join(conf_filepath.split("/")[:-1])
     dataset_name = conf_filename.replace(".json", "")
-    dataset_path = os.path.realpath(os.path.join(args.dataset_path, dataset_name))
+    dataset_path = os.path.realpath(
+        os.path.join(args.dataset_path, dataset_name)
+    )
     if not os.path.exists(conf_filepath):
         raise FileNotFoundError(conf_filename)
 
@@ -183,8 +225,12 @@ def main():
 
     # Replace base, query to dataset-path
     conf_file["dataset"]["base_file"] = os.path.join(dataset_path, "base.fbin")
-    conf_file["dataset"]["query_file"] = os.path.join(dataset_path, "query.fbin")
-    conf_file["dataset"]["groundtruth_neighbors_file"] = os.path.join(dataset_path, "groundtruth.neighbors.ibin")
+    conf_file["dataset"]["query_file"] = os.path.join(
+        dataset_path, "query.fbin"
+    )
+    conf_file["dataset"]["groundtruth_neighbors_file"] = os.path.join(
+        dataset_path, "groundtruth.neighbors.ibin"
+    )
     # Ensure base and query files exist for dataset
     if not os.path.exists(conf_file["dataset"]["base_file"]):
         raise FileNotFoundError(conf_file["dataset"]["base_file"])
@@ -199,8 +245,9 @@ def main():
         # and enabled
         for index in conf_file["index"]:
             curr_algo = index["algo"]
-            if index["name"] in indices and \
-                    validate_algorithm(algos_conf, curr_algo):
+            if index["name"] in indices and validate_algorithm(
+                algos_conf, curr_algo
+            ):
                 executable_path = find_executable(algos_conf, curr_algo)
                 if executable_path not in executables_to_run:
                     executables_to_run[executable_path] = {"index": []}
@@ -213,8 +260,9 @@ def main():
         # and are enabled in algos.yaml
         for index in conf_file["index"]:
             curr_algo = index["algo"]
-            if curr_algo in algorithms and \
-                    validate_algorithm(algos_conf, curr_algo):
+            if curr_algo in algorithms and validate_algorithm(
+                algos_conf, curr_algo
+            ):
                 executable_path = find_executable(algos_conf, curr_algo)
                 if executable_path not in executables_to_run:
                     executables_to_run[executable_path] = {"index": []}
@@ -232,14 +280,26 @@ def main():
 
     # Replace build, search to dataset path
     for executable_path in executables_to_run:
-        for pos, index in enumerate(executables_to_run[executable_path]["index"]):
+        for pos, index in enumerate(
+            executables_to_run[executable_path]["index"]
+        ):
             index["file"] = os.path.join(dataset_path, "index", index["name"])
-            index["search_result_file"] = \
-                os.path.join(dataset_path, "result", index["name"])
+            index["search_result_file"] = os.path.join(
+                dataset_path, "result", index["name"]
+            )
             executables_to_run[executable_path]["index"][pos] = index
 
-    run_build_and_search(conf_filename, conf_file, executables_to_run,
-                         args.force, conf_filedir, build, search, k, batch_size)
+    run_build_and_search(
+        conf_filename,
+        conf_file,
+        executables_to_run,
+        args.force,
+        conf_filedir,
+        build,
+        search,
+        k,
+        batch_size,
+    )
 
 
 if __name__ == "__main__":
diff --git a/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py b/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
index ee5f61ec9e..161617f85c 100644
--- a/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
+++ b/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
@@ -23,18 +23,22 @@ def split_groundtruth(groundtruth_filepath):
     pwd = os.getcwd()
     os.chdir("/".join(groundtruth_filepath.split("/")[:-1]))
     groundtruth_filename = groundtruth_filepath.split("/")[-1]
-    p = subprocess.Popen([ann_bench_scripts_path, groundtruth_filename, 
-                          "groundtruth"])
+    p = subprocess.Popen(
+        [ann_bench_scripts_path, groundtruth_filename, "groundtruth"]
+    )
     p.wait()
     os.chdir(pwd)
 
 
 def main():
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--groundtruth",
-                        help="Path to billion-scale dataset groundtruth file",
-                        required=True)
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--groundtruth",
+        help="Path to billion-scale dataset groundtruth file",
+        required=True,
+    )
     args = parser.parse_args()
 
     split_groundtruth(args.groundtruth)

From 0cf1c6fbe90aadabb6a7ac1b2ffc7004c0eceea1 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Fri, 25 Aug 2023 18:16:39 -0400
Subject: [PATCH 25/70] CLeaning up a couple configs

---
 bench/ann/conf/deep-100M.json             |  17 +
 bench/ann/conf/deep-image-96-angular.json | 494 ++++------------------
 2 files changed, 92 insertions(+), 419 deletions(-)

diff --git a/bench/ann/conf/deep-100M.json b/bench/ann/conf/deep-100M.json
index 1b82f424cc..c587535903 100644
--- a/bench/ann/conf/deep-100M.json
+++ b/bench/ann/conf/deep-100M.json
@@ -182,6 +182,23 @@
         {"nprobe":1000}
       ]
     },
+
+    {
+      "name": "raft_ivf_flat.nlist50K",
+      "algo": "raft_ivf_flat",
+      "build_param": {"nlist": 50000, "niter": 25, "ratio": 5},
+      "file": "deep-100M/raft_ivf_flat/nlist50K",
+      "search_params": [
+        {"max_batch":10000, "max_k":10, "nprobe":20},
+        {"max_batch":10000, "max_k":10, "nprobe":30},
+        {"max_batch":10000, "max_k":10, "nprobe":40},
+        {"max_batch":10000, "max_k":10, "nprobe":50},
+        {"max_batch":10000, "max_k":10, "nprobe":100},
+        {"max_batch":10000, "max_k":10, "nprobe":200},
+        {"max_batch":10000, "max_k":10, "nprobe":500},
+        {"max_batch":10000, "max_k":10, "nprobe":1000}
+      ]
+    },
     {
       "name": "raft_ivf_flat.nlist100K",
       "algo": "raft_ivf_flat",
diff --git a/bench/ann/conf/deep-image-96-angular.json b/bench/ann/conf/deep-image-96-angular.json
index 4467e09dab..ceeff9dd83 100644
--- a/bench/ann/conf/deep-image-96-angular.json
+++ b/bench/ann/conf/deep-image-96-angular.json
@@ -293,24 +293,12 @@
       },
       "file": "index/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024",
       "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
       ],
       "search_result_file": "result/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024"
     },
@@ -325,24 +313,12 @@
       },
       "file": "index/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
       "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
       ],
       "search_result_file": "result/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024"
     },
@@ -727,50 +703,16 @@
       "name": "raft_ivf_pq.dimpq128-cluster1024",
       "algo": "raft_ivf_pq",
       "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
+      "build_param": {"nlist": 1024, "pq_dim": 128, "ratio": 1, "niter": 25
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
+        {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half"},
+        {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half"},
+        {"nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half"},
+        {"nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half"},
+        {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half"},
+        {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half"}
       ],
       "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024"
     },
@@ -786,60 +728,14 @@
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
+        {"nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"}
       ],
       "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
     },
@@ -855,42 +751,12 @@
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"}
       ],
       "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
     },
@@ -906,42 +772,12 @@
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
       ],
       "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
     },
@@ -957,42 +793,12 @@
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
       ],
       "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
     },
@@ -1008,42 +814,12 @@
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"}
       ],
       "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
     },
@@ -1059,42 +835,12 @@
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
       ],
       "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
     },
@@ -1110,42 +856,12 @@
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
       ],
       "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
     },
@@ -1161,42 +877,12 @@
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
+        {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float"},
+        {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float"},
+        {"nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float"},
+        {"nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float"},
+        {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float"},
+        {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float"}
       ],
       "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
     },
@@ -1212,42 +898,12 @@
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
       "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"}
       ],
       "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
     },

From f5bf15a2beef74af7402e6f0cf8f0969ed087d03 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 25 Aug 2023 17:41:24 -0500
Subject: [PATCH 26/70] FIX typo in cmake conditional

---
 cpp/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 32b99fec4d..ecb74ad306 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -22,7 +22,7 @@ include(rapids-find)
 
 option(CPU_ONLY "Build CPU only components. Apples to RAFT ANN benchmarks currently" OFF)
 
-if(CPU_ONLY)
+if(NOT CPU_ONLY)
   include(rapids-cuda)
   rapids_cuda_init_architectures(RAFT)
 endif()

From 617c60f8609795b68ecd727899ba10ae8d83ec58 Mon Sep 17 00:00:00 2001
From: divyegala <divyegala@gmail.com>
Date: Fri, 25 Aug 2023 16:48:33 -0700
Subject: [PATCH 27/70] add tuning guide for cagra, modify build param

---
 bench/ann/conf/bigann-100M.json                 |  4 ++--
 bench/ann/conf/deep-100M.json                   |  8 ++++----
 bench/ann/conf/deep-image-96-angular.json       |  4 ++--
 bench/ann/conf/fashion-mnist-784-euclidean.json |  4 ++--
 bench/ann/conf/gist-960-euclidean.json          |  4 ++--
 bench/ann/conf/glove-100-angular.json           |  4 ++--
 bench/ann/conf/glove-50-angular.json            |  4 ++--
 bench/ann/conf/lastfm-65-angular.json           |  4 ++--
 bench/ann/conf/mnist-784-euclidean.json         |  4 ++--
 bench/ann/conf/nytimes-256-angular.json         |  4 ++--
 bench/ann/conf/sift-128-euclidean.json          |  4 ++--
 cpp/bench/ann/src/raft/raft_benchmark.cu        |  4 ++--
 docs/source/ann_benchmarks_param_tuning.md      | 10 ++++++++++
 docs/source/raft_ann_benchmarks.md              |  3 +--
 14 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/bench/ann/conf/bigann-100M.json b/bench/ann/conf/bigann-100M.json
index c691c68299..e7d8661125 100644
--- a/bench/ann/conf/bigann-100M.json
+++ b/bench/ann/conf/bigann-100M.json
@@ -172,7 +172,7 @@
       "name": "raft_cagra.dim32",
       "algo": "raft_cagra",
       "dataset_memtype": "host",
-      "build_param": {"index_dim": 32},
+      "build_param": {"graph_degree": 32},
       "file": "bigann-100M/raft_cagra/dim32",
       "search_params": [
         {"itopk": 32},
@@ -184,7 +184,7 @@
       "name": "raft_cagra.dim64",
       "algo": "raft_cagra",
       "dataset_memtype":"host",
-      "build_param": {"index_dim": 64},
+      "build_param": {"graph_degree": 64},
       "file": "bigann-100M/raft_cagra/dim64",
       "search_params": [
         {"itopk": 32},
diff --git a/bench/ann/conf/deep-100M.json b/bench/ann/conf/deep-100M.json
index c587535903..433cb80ebf 100644
--- a/bench/ann/conf/deep-100M.json
+++ b/bench/ann/conf/deep-100M.json
@@ -220,7 +220,7 @@
       "name": "raft_cagra.dim32",
       "algo": "raft_cagra",
       "dataset_memtype":"host",
-      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48},
       "file": "deep-100M/raft_cagra/dim32",
       "search_params": [
         {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "single_cta"},
@@ -241,7 +241,7 @@
       "name": "raft_cagra.dim32.multi_cta",
       "algo": "raft_cagra",
       "dataset_memtype":"host",
-      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48},
       "file": "deep-100M/raft_cagra/dim32",
       "search_params": [
         {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta"},
@@ -262,7 +262,7 @@
       "name": "raft_cagra.dim32.multi_kernel",
       "algo": "raft_cagra",
       "dataset_memtype":"host",
-      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48},
       "file": "deep-100M/raft_cagra/dim32",
       "search_params": [
         {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_kernel"},
@@ -283,7 +283,7 @@
       "name": "raft_cagra.dim64",
       "algo": "raft_cagra",
       "dataset_memtype":"host",
-      "build_param": {"index_dim": 64},
+      "build_param": {"graph_degree": 64},
       "file": "deep-100M/raft_cagra/dim64",
       "search_params": [
         {"itopk": 32, "search_width": 1, "max_iterations": 0},
diff --git a/bench/ann/conf/deep-image-96-angular.json b/bench/ann/conf/deep-image-96-angular.json
index ceeff9dd83..72795c449d 100644
--- a/bench/ann/conf/deep-image-96-angular.json
+++ b/bench/ann/conf/deep-image-96-angular.json
@@ -992,7 +992,7 @@
       "algo" : "raft_cagra",
       "dataset_memtype": "device",
       "build_param": {
-        "index_dim" : 32
+        "graph_degree" : 32
       },
       "file" : "index/deep-image-96-angular/raft_cagra/dim32",
       "search_params" : [
@@ -1008,7 +1008,7 @@
       "algo" : "raft_cagra",
       "dataset_memtype": "device",
       "build_param": {
-        "index_dim" : 64
+        "graph_degree" : 64
       },
       "file" : "index/deep-image-96-angular/raft_cagra/dim64",
       "search_params" : [
diff --git a/bench/ann/conf/fashion-mnist-784-euclidean.json b/bench/ann/conf/fashion-mnist-784-euclidean.json
index 5a0713ca0b..1a24eed018 100644
--- a/bench/ann/conf/fashion-mnist-784-euclidean.json
+++ b/bench/ann/conf/fashion-mnist-784-euclidean.json
@@ -1336,7 +1336,7 @@
       "algo" : "raft_cagra",
       "dataset_memtype": "device",
       "build_param": {
-        "index_dim" : 32
+        "graph_degree" : 32
       },
       "file" : "index/fashion-mnist-784-euclidean/raft_cagra/dim32",
       "search_params" : [
@@ -1352,7 +1352,7 @@
       "algo" : "raft_cagra",
       "dataset_memtype": "device",
       "build_param": {
-        "index_dim" : 64
+        "graph_degree" : 64
       },
       "file" : "index/fashion-mnist-784-euclidean/raft_cagra/dim64",
       "search_params" : [
diff --git a/bench/ann/conf/gist-960-euclidean.json b/bench/ann/conf/gist-960-euclidean.json
index d03df0f486..fed7750172 100644
--- a/bench/ann/conf/gist-960-euclidean.json
+++ b/bench/ann/conf/gist-960-euclidean.json
@@ -1322,7 +1322,7 @@
       "name" : "raft_cagra.dim32",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 32
+        "graph_degree" : 32
       },
       "file" : "index/gist-960-euclidean/raft_cagra/dim32",
       "search_params" : [
@@ -1337,7 +1337,7 @@
       "name" : "raft_cagra.dim64",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 64
+        "graph_degree" : 64
       },
       "file" : "index/gist-960-euclidean/raft_cagra/dim64",
       "search_params" : [
diff --git a/bench/ann/conf/glove-100-angular.json b/bench/ann/conf/glove-100-angular.json
index 1d3dc09988..8c2f8ee617 100644
--- a/bench/ann/conf/glove-100-angular.json
+++ b/bench/ann/conf/glove-100-angular.json
@@ -1322,7 +1322,7 @@
       "name" : "raft_cagra.dim32",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 32
+        "graph_degree" : 32
       },
       "file" : "index/glove-100-angular/raft_cagra/dim32",
       "search_params" : [
@@ -1337,7 +1337,7 @@
       "name" : "raft_cagra.dim64",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 64
+        "graph_degree" : 64
       },
       "file" : "index/glove-100-angular/raft_cagra/dim64",
       "search_params" : [
diff --git a/bench/ann/conf/glove-50-angular.json b/bench/ann/conf/glove-50-angular.json
index 3e78c11814..a73ed1ec07 100644
--- a/bench/ann/conf/glove-50-angular.json
+++ b/bench/ann/conf/glove-50-angular.json
@@ -1322,7 +1322,7 @@
       "name" : "raft_cagra.dim32",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 32
+        "graph_degree" : 32
       },
       "file" : "index/glove-50-angular/raft_cagra/dim32",
       "search_params" : [
@@ -1337,7 +1337,7 @@
       "name" : "raft_cagra.dim64",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 64
+        "graph_degree" : 64
       },
       "file" : "index/glove-50-angular/raft_cagra/dim64",
       "search_params" : [
diff --git a/bench/ann/conf/lastfm-65-angular.json b/bench/ann/conf/lastfm-65-angular.json
index 62f8878bd6..b07e682268 100644
--- a/bench/ann/conf/lastfm-65-angular.json
+++ b/bench/ann/conf/lastfm-65-angular.json
@@ -1322,7 +1322,7 @@
       "name" : "raft_cagra.dim32",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 32
+        "graph_degree" : 32
       },
       "file" : "index/lastfm-65-angular/raft_cagra/dim32",
       "search_params" : [
@@ -1337,7 +1337,7 @@
       "name" : "raft_cagra.dim64",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 64
+        "graph_degree" : 64
       },
       "file" : "index/lastfm-65-angular/raft_cagra/dim64",
       "search_params" : [
diff --git a/bench/ann/conf/mnist-784-euclidean.json b/bench/ann/conf/mnist-784-euclidean.json
index 30e39a841c..362cc21083 100644
--- a/bench/ann/conf/mnist-784-euclidean.json
+++ b/bench/ann/conf/mnist-784-euclidean.json
@@ -1322,7 +1322,7 @@
       "name" : "raft_cagra.dim32",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 32
+        "graph_degree" : 32
       },
       "file" : "index/mnist-784-euclidean/raft_cagra/dim32",
       "search_params" : [
@@ -1337,7 +1337,7 @@
       "name" : "raft_cagra.dim64",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 64
+        "graph_degree" : 64
       },
       "file" : "index/mnist-784-euclidean/raft_cagra/dim64",
       "search_params" : [
diff --git a/bench/ann/conf/nytimes-256-angular.json b/bench/ann/conf/nytimes-256-angular.json
index 5d4e19d46b..4c389bb6b7 100644
--- a/bench/ann/conf/nytimes-256-angular.json
+++ b/bench/ann/conf/nytimes-256-angular.json
@@ -1322,7 +1322,7 @@
       "name" : "raft_cagra.dim32",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 32
+        "graph_degree" : 32
       },
       "file" : "index/nytimes-256-angular/raft_cagra/dim32",
       "search_params" : [
@@ -1337,7 +1337,7 @@
       "name" : "raft_cagra.dim64",
       "algo" : "raft_cagra",
       "build_param": {
-        "index_dim" : 64
+        "graph_degree" : 64
       },
       "file" : "index/nytimes-256-angular/raft_cagra/dim64",
       "search_params" : [
diff --git a/bench/ann/conf/sift-128-euclidean.json b/bench/ann/conf/sift-128-euclidean.json
index 116ea8d557..439c1a10c6 100644
--- a/bench/ann/conf/sift-128-euclidean.json
+++ b/bench/ann/conf/sift-128-euclidean.json
@@ -475,7 +475,7 @@
     {
       "name": "raft_cagra.dim32",
       "algo": "raft_cagra",
-      "build_param": {"index_dim": 32},
+      "build_param": {"graph_degree": 32},
       "file": "sift-128-euclidean/raft_cagra/dim32",
       "search_params": [
         {"itopk": 32},
@@ -486,7 +486,7 @@
     {
       "name": "raft_cagra.dim64",
       "algo": "raft_cagra",
-      "build_param": {"index_dim": 64},
+      "build_param": {"graph_degree": 64},
       "file": "sift-128-euclidean/raft_cagra/dim64",
       "search_params": [
         {"itopk": 32},
diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu
index 823fa3f2f3..aa25d1532f 100644
--- a/cpp/bench/ann/src/raft/raft_benchmark.cu
+++ b/cpp/bench/ann/src/raft/raft_benchmark.cu
@@ -132,8 +132,8 @@ template <typename T, typename IdxT>
 void parse_build_param(const nlohmann::json& conf,
                        typename raft::bench::ann::RaftCagra<T, IdxT>::BuildParam& param)
 {
-  if (conf.contains("index_dim")) {
-    param.graph_degree              = conf.at("index_dim");
+  if (conf.contains("graph_degree")) {
+    param.graph_degree              = conf.at("graph_degree");
     param.intermediate_graph_degree = param.graph_degree * 2;
   }
   if (conf.contains("intermediate_graph_degree")) {
diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
index f5801322d3..5d59fac75e 100644
--- a/docs/source/ann_benchmarks_param_tuning.md
+++ b/docs/source/ann_benchmarks_param_tuning.md
@@ -36,6 +36,16 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of
 
 
 ### `raft_cagra`
+CAGRA uses a graph-based index, which creates an intermediate, approximate kNN graph using IVF-PQ and then further refining and optimizing to create a final kNN graph. This kNN graph is used by CAGRA as an index for search.
+
+| Parameter | Type           | Required | Data Type           | Default | Description                                                                                                                                                                       |
+|-----------|----------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `graph_degree`  | `build_param`  | N        | Positive Integer >0 | 64 | Degree of the final kNN graph index. |
+| `intermediate_graph_degree`  | `build_param`  | N        | Positive Integer >0 | 128 | Degree of the intermediate kNN graph. |
+| `itopk`  | `search_wdith`  | N        | Positive Integer >0 | 64 | Number of intermediate search results retained during the search. Higher values improve search accuracy at the cost of speed. |
+| `search_width`  | `search_param`  | N        | Positive Integer >0 | 1 | Number of graph nodes to select as the starting point for the search in each iteration. |
+| `max_iterations`  | `search_param`  | N        | Integer >=0 | 0 | Upper limit of search iterations. Auto select when 0. |
+| `algo`  | `search_param`  | N        | string | "auto" | Algorithm to use for search. Possible values: {"auto", "single_cta", "multi_cta", "multi_kernel"} |
 
 
 ## FAISS Indexes
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index e242f8b655..67e04ff518 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -118,11 +118,10 @@ options:
   --dataset-path DATASET_PATH
                         path to download dataset (default: ${RAFT_HOME}/bench/ann/data)
   --normalize           normalize cosine distance to inner product (default: False)
-
+```
 When option `normalize` is provided to the script, any dataset that has cosine distances
 will be normalized to inner product. So, for example, the dataset `glove-100-angular` 
 will be written at location `${RAFT_HOME}/bench/ann/data/glove-100-inner/`.
-```
 
 #### Step 2: Build and Search Index
 The script `bench/ann/run.py` will build and search indices for a given dataset and its

From 74c9a1bc4704f25dfcc0a2c8901b813c75da7883 Mon Sep 17 00:00:00 2001
From: divyegala <divyegala@gmail.com>
Date: Fri, 25 Aug 2023 18:44:04 -0700
Subject: [PATCH 28/70] remove data_export, use gbench csvs to plot

---
 bench/ann/data_export.py           | 80 ------------------------------
 bench/ann/plot.py                  | 44 ++++++++++------
 bench/ann/run.py                   |  3 +-
 docs/source/raft_ann_benchmarks.md | 68 +++++++++----------------
 4 files changed, 53 insertions(+), 142 deletions(-)
 delete mode 100644 bench/ann/data_export.py

diff --git a/bench/ann/data_export.py b/bench/ann/data_export.py
deleted file mode 100644
index 87ca330ed9..0000000000
--- a/bench/ann/data_export.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import subprocess
-import json
-
-from pathlib import Path
-
-def parse_filepaths(fs):
-    for p in fs:
-        if p.endswith(".json") and os.path.exists(p):
-            yield p
-        else:
-            for f in Path(p).rglob('*.json'):
-                yield f.as_posix()
-
-def export_results(output_filepath, recompute, groundtruth_filepath,
-                   result_filepath):
-    print(f"Writing output file to: {output_filepath}")
-
-    parsed_filepaths = parse_filepaths(result_filepaths)
-
-    with open(output_filepath, 'w') as out:
-        out.write("Algo,Recall,QPS\n")
-
-        for fp in parsed_filepaths:
-            with open(fp, 'r') as f:
-                data = json.load(f)
-                for benchmark_case in data["benchmarks"]:
-                    algo = benchmark_case["name"]
-                    recall = benchmark_case["Recall"]
-                    qps = benchmark_case["items_per_second"]
-                    out.write(f"{algo},{recall},{qps}\n")
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--output", help="Path to the CSV output file",
-                        required=True)
-    parser.add_argument("--recompute", action="store_true",
-                        help="Recompute metrics")
-    parser.add_argument("--dataset",
-                        help="Name of the dataset to export results for",
-                        default="glove-100-inner")
-    parser.add_argument(
-        "--dataset-path",
-        help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"),
-                             "bench", "ann", "data")
-    )
-
-    args, result_filepaths = parser.parse_known_args()
-
-    # if nothing is provided
-    if len(result_filepaths) == 0:
-        raise ValueError("No filepaths to results were provided")
-
-    groundtruth_filepath = os.path.join(args.dataset_path, args.dataset,
-                                        "groundtruth.neighbors.ibin")
-    export_results(args.output, args.recompute, groundtruth_filepath,
-                   result_filepath)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bench/ann/plot.py b/bench/ann/plot.py
index 0020e398a9..33a1872fe0 100644
--- a/bench/ann/plot.py
+++ b/bench/ann/plot.py
@@ -192,25 +192,38 @@ def inv_fun(x):
     plt.close()
 
 
-def load_all_results(result_filepath):
+def load_all_results(dataset_path):
     results = dict()
-    with open(result_filepath, 'r') as f:
-        for line in f.readlines()[1:]:
-            split_lines = line.split(',')
-            algo_name = split_lines[0].split('.')[0]
-            if algo_name not in results:
-                results[algo_name] = []
-            results[algo_name].append([algo_name, float(split_lines[1]), 
-                                  float(split_lines[2])])
+    results_path = os.path.join(dataset_path, "result", "search")
+    for result_filepath in os.listdir(results_path):
+        with open(os.path.join(results_path, result_filepath), 'r') as f:
+            lines = f.readlines()
+            idx = 0
+            for pos, line in enumerate(lines):
+                if "QPS" in line:
+                    idx = pos
+                    break
+            
+            for line in lines[idx+1:]:
+                split_lines = line.split(',')
+                algo_name = split_lines[0].split('.')[0].strip("\"")
+                if algo_name not in results:
+                    results[algo_name] = []
+                results[algo_name].append([algo_name, float(split_lines[12]), 
+                                    float(split_lines[10])])
     return results
 
 
 def main():
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--result-csv", help="Path to CSV Results", required=True)
-    parser.add_argument("--output", help="Path to the PNG output file",
-                        default=f"{os.getcwd()}/out.png")
+    parser.add_argument("--dataset", help="dataset to download",
+                        default="glove-100-inner")
+    parser.add_argument("--dataset-path", help="path to dataset folder",
+                        default=os.path.join(os.getenv("RAFT_HOME"), 
+                                             "bench", "ann", "data"))
+    parser.add_argument("--output-filename",
+                        default="plot.png")
     parser.add_argument(
         "--x-scale",
         help="Scale to use when drawing the X-axis. \
@@ -228,12 +241,13 @@ def main():
     )
     args = parser.parse_args()
 
-    print(f"writing output to {args.output}")
+    output_filepath = os.path.join(args.dataset_path, args.dataset, args.output_filename)
+    print(f"writing output to {output_filepath}")
 
-    results = load_all_results(args.result_csv)
+    results = load_all_results(os.path.join(args.dataset_path, args.dataset))
     linestyles = create_linestyles(sorted(results.keys()))
 
-    create_plot(results, args.raw, args.x_scale, args.y_scale, args.output, linestyles)
+    create_plot(results, args.raw, args.x_scale, args.y_scale, output_filepath, linestyles)
 
 
 if __name__ == "__main__":
diff --git a/bench/ann/run.py b/bench/ann/run.py
index 8da3eadc3b..5c927d5066 100644
--- a/bench/ann/run.py
+++ b/bench/ann/run.py
@@ -124,6 +124,7 @@ def main():
     parser.add_argument(
         "--dataset",
         help="dataset whose configuration file will be used",
+        default="glove-100-inner"
     )
     parser.add_argument(
         "--dataset-path",
@@ -232,8 +233,6 @@ def main():
             index["file"] = os.path.join(dataset_path, dataset_name, "index", index["name"])
             executables_to_run[executable_path]["index"][pos] = index
 
-    print(executables_to_run)
-
     run_build_and_search(conf_file, conf_filename, conf_filedir, 
                          executables_to_run, dataset_path,
                          args.force, build, search,
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index 67e04ff518..78ff2d96a1 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -21,11 +21,10 @@ Please see the [build instructions](ann_benchmarks_build.md) to build the benchm
 ## Running the benchmarks
 
 ### Usage
-There are 4 general steps to running the benchmarks and vizualizing the results:
+There are 3 general steps to running the benchmarks and vizualizing the results:
 1. Prepare Dataset
 2. Build Index and Search Index
-3. Evaluate Results
-4. Plot Results
+3. Plot Results
 
 We provide a collection of lightweight Python scripts that are wrappers over
 lower level scripts and executables to run our benchmarks. Either Python scripts or
@@ -47,11 +46,8 @@ python bench/ann/get_dataset.py --dataset deep-image-96-angular --normalize
 # (2) build and search index
 python bench/ann/run.py --dataset deep-image-96-inner
 
-# (3) evaluate results
-python bench/ann/data_export.py --output out.csv --dataset deep-image-96-inner
-
-# (4) plot results
-python bench/ann/plot.py --result-csv out.csv
+# (3) plot results
+python bench/ann/plot.py --dataset deep-image-96-inner
 ```
 
 Configuration files already exist for the following list of the million-scale datasets. These all work out-of-the-box with the `--dataset` argument. Other million-scale datasets from `ann-benchmarks.com` will work, but will require a json configuration file to be created in `bench/ann/conf`.
@@ -86,11 +82,8 @@ python bench/ann/split_groundtruth.py --groundtruth bench/ann/data/deep-1B/deep_
 # (2) build and search index
 python bench/ann/run.py --dataset deep-1B
 
-# (3) evaluate results
-python bench/ann/data_export.py --output out.csv --dataset deep-1B
-
-# (4) plot results
-python bench/ann/plot.py --result-csv out.csv
+# (3) plot results
+python bench/ann/plot.py --dataset deep-1B
 ```
 
 The usage of `bench/ann/split-groundtruth.py` is:
@@ -119,6 +112,7 @@ options:
                         path to download dataset (default: ${RAFT_HOME}/bench/ann/data)
   --normalize           normalize cosine distance to inner product (default: False)
 ```
+
 When option `normalize` is provided to the script, any dataset that has cosine distances
 will be normalized to inner product. So, for example, the dataset `glove-100-angular` 
 will be written at location `${RAFT_HOME}/bench/ann/data/glove-100-inner/`.
@@ -140,13 +134,15 @@ available in `raft/cpp/build/`.
 
 The usage of the script `bench/ann/run.py` is:
 ```bash
-usage: run.py [-h] [--configuration CONFIGURATION] [--dataset DATASET] [--build] [--search] [--algorithms ALGORITHMS] [--indices INDICES] [-f]
-
-options:
-usage: run.py [-h] [--configuration CONFIGURATION] [--dataset DATASET] [--dataset-path DATASET_PATH] [--build] [--search] [--algorithms ALGORITHMS] [--indices INDICES] [-f]
+usage: run.py [-h] [-k COUNT] [-bs BATCH_SIZE] [--configuration CONFIGURATION] [--dataset DATASET] [--dataset-path DATASET_PATH] [--build] [--search] [--algorithms ALGORITHMS] [--indices INDICES]
+              [-f]
 
 options:
   -h, --help            show this help message and exit
+  -k COUNT, --count COUNT
+                        the number of nearest neighbors to search for (default: 10)
+  -bs BATCH_SIZE, --batch-size BATCH_SIZE
+                        number of query vectors to use in each query trial (default: 10000)
   --configuration CONFIGURATION
                         path to configuration file for a dataset (default: None)
   --dataset DATASET     dataset whose configuration file will be used (default: glove-100-inner)
@@ -157,14 +153,15 @@ options:
   --algorithms ALGORITHMS
                         run only comma separated list of named algorithms (default: None)
   --indices INDICES     run only comma separated list of named indices. parameter `algorithms` is ignored (default: None)
-  -k, --count           number of nearest neighbors to return
-  --batch-size          number of query vectors to pass into search
   -f, --force           re-run algorithms even if their results already exist (default: False)
 ```
+
 `configuration` and `dataset` : `configuration` is a path to a configuration file for a given dataset.
 The configuration file should be name as `<dataset>.json`. It is optional if the name of the dataset is
 provided with the `dataset` argument, in which case
-a configuration file will be searched for as `${RAFT_HOME}/bench/ann/conf/<dataset>.json`
+a configuration file will be searched for as `${RAFT_HOME}/bench/ann/conf/<dataset>.json`.
+For every algorithm run by this script, it outputs an index build statistics CSV file in `<dataset-path/<dataset>/build/<algo.csv>
+and an index search statistics CSV file in `<dataset-path/<dataset>/search/<algo.csv>.
 
 `dataset-path` : 
 1. data is read from `<dataset-path>/<dataset>`
@@ -177,45 +174,26 @@ it is assumed both are `True`.
 `indices` and `algorithms` : these parameters ensure that the algorithm specified for an index 
 is available in `algos.yaml` and not disabled, as well as having an associated executable.
 
-#### Step 3: Evaluating Results
-The script `bench/ann/data_export.py` will evaluate results for a dataset whose index has been built
-and searched with at least one algorithm. For every result file that is available to the script, the output
-will be combined and written to a CSV file.
+#### Step 3: Plot Results
+The script `bench/ann/plot.py` will plot results for all algorithms found in index search statistics
+CSV file in `<dataset-path/<dataset>/search/<algo.csv>.
 
 The usage of this script is:
 ```bash
-usage: data_export.py [-h] --output OUTPUT [--recompute] [--dataset DATASET] [--dataset-path DATASET_PATH]
+usage: plot.py [-h] [--dataset DATASET] [--dataset-path DATASET_PATH] [--output-filename OUTPUT_FILENAME] [--x-scale X_SCALE] [--y-scale {linear,log,symlog,logit}] [--raw]
 
 options:
   -h, --help            show this help message and exit
-  --output OUTPUT       Path to the CSV output file (default: None)
-  --recompute           Recompute metrics (default: False)
-  --dataset DATASET     Name of the dataset to export results for (default: glove-100-inner)
+  --dataset DATASET     dataset to download (default: glove-100-inner)
   --dataset-path DATASET_PATH
                         path to dataset folder (default: ${RAFT_HOME}/bench/ann/data)
-```
-
-#### Step 4: Plot Results
-The script `bench/ann/plot.py` will plot all results evaluated to a CSV file for a given dataset.
-
-The usage of this script is:
-```bash
-usage: plot.py [-h] --result_csv RESULT_CSV [--output OUTPUT] [--x-scale X_SCALE] [--y-scale {linear,log,symlog,logit}] [--raw]
-
-options:
-  -h, --help            show this help message and exit
-  --result-csv RESULT_CSV
-                        Path to CSV Results (default: None)
-  --output OUTPUT       Path to the PNG output file (default: ${RAFT_HOME}/out.png)
+  --output-filename OUTPUT_FILENAME
   --x-scale X_SCALE     Scale to use when drawing the X-axis. Typically linear, logit or a2 (default: linear)
   --y-scale {linear,log,symlog,logit}
                         Scale to use when drawing the Y-axis (default: linear)
   --raw                 Show raw results (not just Pareto frontier) in faded colours (default: False)
 ```
 
-All algorithms present in the CSV file supplied to this script with parameter `result_csv`
-will appear in the plot.
-
 The figure below is the resulting plot of running our benchmarks as of August 2023 for a batch size of 10, on an NVIDIA H100 GPU and an Intel Xeon Platinum 8480CL CPU. It presents the throughput (in Queries-Per-Second) performance for every level of recall.
 
 ![Throughput vs recall plot comparing popular ANN algorithms with RAFT's at batch size 10](../../img/raft-vector-search-batch-10.png)

From 902f9f48b34e397c6846d05b0f52016932e4537f Mon Sep 17 00:00:00 2001
From: divyegala <divyegala@gmail.com>
Date: Fri, 25 Aug 2023 18:51:34 -0700
Subject: [PATCH 29/70] fix typo in docs path for results

---
 docs/source/raft_ann_benchmarks.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index 78ff2d96a1..757e9a59b5 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -160,8 +160,8 @@ options:
 The configuration file should be name as `<dataset>.json`. It is optional if the name of the dataset is
 provided with the `dataset` argument, in which case
 a configuration file will be searched for as `${RAFT_HOME}/bench/ann/conf/<dataset>.json`.
-For every algorithm run by this script, it outputs an index build statistics CSV file in `<dataset-path/<dataset>/build/<algo.csv>
-and an index search statistics CSV file in `<dataset-path/<dataset>/search/<algo.csv>.
+For every algorithm run by this script, it outputs an index build statistics CSV file in `<dataset-path/<dataset>/result/build/<algo.csv>
+and an index search statistics CSV file in `<dataset-path/<dataset>/result/search/<algo.csv>.
 
 `dataset-path` : 
 1. data is read from `<dataset-path>/<dataset>`
@@ -176,7 +176,7 @@ is available in `algos.yaml` and not disabled, as well as having an associated e
 
 #### Step 3: Plot Results
 The script `bench/ann/plot.py` will plot results for all algorithms found in index search statistics
-CSV file in `<dataset-path/<dataset>/search/<algo.csv>.
+CSV file in `<dataset-path/<dataset>/search/result/<algo.csv>.
 
 The usage of this script is:
 ```bash

From 1198e1abdbc2d080f7dd6e8c6185666800633384 Mon Sep 17 00:00:00 2001
From: divyegala <divyegala@gmail.com>
Date: Fri, 25 Aug 2023 18:56:44 -0700
Subject: [PATCH 30/70] for plotting, pick up recall/qps from anywhere in the
 csv columns

---
 bench/ann/plot.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/bench/ann/plot.py b/bench/ann/plot.py
index 33a1872fe0..80433b0de9 100644
--- a/bench/ann/plot.py
+++ b/bench/ann/plot.py
@@ -204,13 +204,21 @@ def load_all_results(dataset_path):
                     idx = pos
                     break
             
+            keys = lines[idx].split(',')
+            recall_idx = -1
+            qps_idx = -1
+            for pos, key in enumerate(keys):
+                if "Recall" in key:
+                    recall_idx = pos
+                if "QPS" in key:
+                    qps_idx = pos
             for line in lines[idx+1:]:
                 split_lines = line.split(',')
                 algo_name = split_lines[0].split('.')[0].strip("\"")
                 if algo_name not in results:
                     results[algo_name] = []
-                results[algo_name].append([algo_name, float(split_lines[12]), 
-                                    float(split_lines[10])])
+                results[algo_name].append([algo_name, float(split_lines[recall_idx]), 
+                                    float(split_lines[qps_idx])])
     return results
 
 

From 3f647c325c8d8906fc583ce0efa2faff5f525f9a Mon Sep 17 00:00:00 2001
From: divyegala <divyegala@gmail.com>
Date: Fri, 25 Aug 2023 19:38:18 -0700
Subject: [PATCH 31/70] add output-filepath for plot.py

---
 bench/ann/plot.py                  | 7 ++++---
 docs/source/raft_ann_benchmarks.md | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/bench/ann/plot.py b/bench/ann/plot.py
index 80433b0de9..c8fe947684 100644
--- a/bench/ann/plot.py
+++ b/bench/ann/plot.py
@@ -230,8 +230,9 @@ def main():
     parser.add_argument("--dataset-path", help="path to dataset folder",
                         default=os.path.join(os.getenv("RAFT_HOME"), 
                                              "bench", "ann", "data"))
-    parser.add_argument("--output-filename",
-                        default="plot.png")
+    parser.add_argument("--output-filepath",
+                        help="directory for PNG to be saved",
+                        default=os.getcwd())
     parser.add_argument(
         "--x-scale",
         help="Scale to use when drawing the X-axis. \
@@ -249,7 +250,7 @@ def main():
     )
     args = parser.parse_args()
 
-    output_filepath = os.path.join(args.dataset_path, args.dataset, args.output_filename)
+    output_filepath = os.path.join(args.output_filepath, args.dataset + ".png")
     print(f"writing output to {output_filepath}")
 
     results = load_all_results(os.path.join(args.dataset_path, args.dataset))
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index 757e9a59b5..518c86a27c 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -180,14 +180,15 @@ CSV file in `<dataset-path/<dataset>/search/result/<algo.csv>.
 
 The usage of this script is:
 ```bash
-usage: plot.py [-h] [--dataset DATASET] [--dataset-path DATASET_PATH] [--output-filename OUTPUT_FILENAME] [--x-scale X_SCALE] [--y-scale {linear,log,symlog,logit}] [--raw]
+usage: plot.py [-h] [--dataset DATASET] [--dataset-path DATASET_PATH] [--output-filepath OUTPUT_FILEPATH] [--x-scale X_SCALE] [--y-scale {linear,log,symlog,logit}] [--raw]
 
 options:
   -h, --help            show this help message and exit
   --dataset DATASET     dataset to download (default: glove-100-inner)
   --dataset-path DATASET_PATH
-                        path to dataset folder (default: ${RAFT_HOME}/bench/ann/data)
-  --output-filename OUTPUT_FILENAME
+                        path to dataset folder (default: /home/nfs/dgala/raft/bench/ann/data)
+  --output-filepath OUTPUT_FILEPATH
+                        directory for PNG to be saved (default: os.getcwd())
   --x-scale X_SCALE     Scale to use when drawing the X-axis. Typically linear, logit or a2 (default: linear)
   --y-scale {linear,log,symlog,logit}
                         Scale to use when drawing the Y-axis (default: linear)

From 354287db773ee796698a9b6a2f27bf39004a2ec2 Mon Sep 17 00:00:00 2001
From: divyegala <divyegala@gmail.com>
Date: Fri, 25 Aug 2023 19:39:45 -0700
Subject: [PATCH 32/70] fix typo in docs

---
 docs/source/raft_ann_benchmarks.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index 518c86a27c..29187e77e2 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -186,7 +186,7 @@ options:
   -h, --help            show this help message and exit
   --dataset DATASET     dataset to download (default: glove-100-inner)
   --dataset-path DATASET_PATH
-                        path to dataset folder (default: /home/nfs/dgala/raft/bench/ann/data)
+                        path to dataset folder (default: ${RAFT_HOME}/bench/ann/data)
   --output-filepath OUTPUT_FILEPATH
                         directory for PNG to be saved (default: os.getcwd())
   --x-scale X_SCALE     Scale to use when drawing the X-axis. Typically linear, logit or a2 (default: linear)

From e0dfbab91590305c6f94c80ee1c57d5b0d11ffcc Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Sat, 26 Aug 2023 08:11:32 -0700
Subject: [PATCH 33/70] Reverting changes to deep-100M

---
 .gitignore                    |   1 +
 bench/ann/conf/deep-100M.json | 247 +++++++++++++++++++++++++++++++++-
 bench/ann/plot.py             |   2 +
 3 files changed, 248 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7eb29cbcb7..7939fc1622 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@ dask-worker-space/
 *.egg-info/
 *.bin
 bench/ann/data
+temporary_*.json
 
 ## scikit-build
 _skbuild
diff --git a/bench/ann/conf/deep-100M.json b/bench/ann/conf/deep-100M.json
index 433cb80ebf..f95df5e965 100644
--- a/bench/ann/conf/deep-100M.json
+++ b/bench/ann/conf/deep-100M.json
@@ -1,9 +1,9 @@
 {
   "dataset": {
     "name": "deep-100M",
-    "base_file": "data/deep-1B/base.1B.fbin",
+    "base_file": "deep-100M/base.1B.fbin",
     "subset_size": 100000000,
-    "query_file": "data/deep-1B/query.public.10K.fbin",
+    "query_file": "deep-100M/query.public.10K.fbin",
     "groundtruth_neighbors_file": "deep-100M/groundtruth.neighbors.ibin",
     "distance": "euclidean"
   },
@@ -182,6 +182,23 @@
         {"nprobe":1000}
       ]
     },
+    {
+      "name": "faiss_ivf_pq.M48-nlist200K",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":200000, "M":48},
+      "file": "deep-100M/faiss_ivf_pq/M48-nlist200K",
+      "search_params": [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ]
+    },
+
 
     {
       "name": "raft_ivf_flat.nlist50K",
@@ -216,6 +233,232 @@
         {"max_batch":10000, "max_k":10, "nprobe":1000}
       ]
     },
+    {
+      "name": "raft_ivf_flat.nlist200K",
+      "algo": "raft_ivf_flat",
+      "dataset_memtype":"host",
+      "build_param": {"nlist": 200000, "niter": 25, "ratio": 5},
+      "file": "deep-100M/raft_ivf_flat/nlist200K",
+      "search_params": [
+        {"max_batch":10000, "max_k":10, "nprobe":20},
+        {"max_batch":10000, "max_k":10, "nprobe":30},
+        {"max_batch":10000, "max_k":10, "nprobe":40},
+        {"max_batch":10000, "max_k":10, "nprobe":50},
+        {"max_batch":10000, "max_k":10, "nprobe":100},
+        {"max_batch":10000, "max_k":10, "nprobe":200},
+        {"max_batch":10000, "max_k":10, "nprobe":500},
+        {"max_batch":10000, "max_k":10, "nprobe":1000}
+      ]
+    },
+
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {"nlist": 1024, "pq_dim": 128, "ratio": 1, "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024",
+      "search_params": [
+        {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half"},
+        {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half"},
+        {"nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half"},
+        {"nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half"},
+        {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half"},
+        {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half"}
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "search_params": [
+        {"nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"}
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "search_params": [
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"}
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+      "search_params": [
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+      "search_params": [
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 64,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
+      "search_params": [
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"}
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 32,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+      "search_params": [
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 16,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+      "search_params": [
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 128,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
+      "search_params": [
+        {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float"},
+        {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float"},
+        {"nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "float"},
+        {"nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "float"},
+        {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float"},
+        {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float"}
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
+    },
+    {
+      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
+      "algo": "raft_ivf_pq",
+      "dataset_memtype": "device",
+      "build_param": {
+        "nlist": 1024,
+        "pq_dim": 512,
+        "ratio": 1,
+        "niter": 25
+      },
+      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
+      "search_params": [
+        {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"},
+        {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"}
+      ],
+      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
+    },    
     {
       "name": "raft_cagra.dim32",
       "algo": "raft_cagra",
diff --git a/bench/ann/plot.py b/bench/ann/plot.py
index 80433b0de9..099c7c90f9 100644
--- a/bench/ann/plot.py
+++ b/bench/ann/plot.py
@@ -212,8 +212,10 @@ def load_all_results(dataset_path):
                     recall_idx = pos
                 if "QPS" in key:
                     qps_idx = pos
+
             for line in lines[idx+1:]:
                 split_lines = line.split(',')
+
                 algo_name = split_lines[0].split('.')[0].strip("\"")
                 if algo_name not in results:
                     results[algo_name] = []

From 16e233b604ecc79e192fbe67f07987be5365da93 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sun, 27 Aug 2023 15:29:04 -0500
Subject: [PATCH 34/70] FIX typo in build.sh

---
 build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.sh b/build.sh
index 628332b6e5..0319c0e3dc 100755
--- a/build.sh
+++ b/build.sh
@@ -422,7 +422,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
           -DBUILD_TESTS=${BUILD_TESTS} \
           -DBUILD_PRIMS_BENCH=${BUILD_PRIMS_BENCH} \
           -DBUILD_ANN_BENCH=${BUILD_ANN_BENCH} \
-          -DCPU_ONLY=${CPU_ONLY}
+          -DCPU_ONLY=${CPU_ONLY} \
           -DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \
           ${CACHE_ARGS} \
           ${EXTRA_CMAKE_ARGS}

From cac89d0078c202d74f22980ef52d0a59a1dc852c Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Sun, 27 Aug 2023 15:56:05 -0500
Subject: [PATCH 35/70] DBG Make cmake verbose

---
 conda/recipes/libraft/build_libraft.sh          | 2 +-
 conda/recipes/libraft/build_libraft_headers.sh  | 2 +-
 conda/recipes/libraft/build_libraft_template.sh | 2 +-
 conda/recipes/libraft/build_libraft_tests.sh    | 2 +-
 conda/recipes/raft-ann-bench/build.sh           | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/conda/recipes/libraft/build_libraft.sh b/conda/recipes/libraft/build_libraft.sh
index 7d4173e8bb..71e1533893 100644
--- a/conda/recipes/libraft/build_libraft.sh
+++ b/conda/recipes/libraft/build_libraft.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
-./build.sh libraft --allgpuarch --compile-lib --build-metrics=compile_lib --incl-cache-stats --no-nvtx
+./build.sh libraft -v --allgpuarch --compile-lib --build-metrics=compile_lib --incl-cache-stats --no-nvtx
diff --git a/conda/recipes/libraft/build_libraft_headers.sh b/conda/recipes/libraft/build_libraft_headers.sh
index cc3b840e43..330ac92ff3 100644
--- a/conda/recipes/libraft/build_libraft_headers.sh
+++ b/conda/recipes/libraft/build_libraft_headers.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
-./build.sh libraft --allgpuarch --no-nvtx
+./build.sh libraft -v --allgpuarch --no-nvtx
diff --git a/conda/recipes/libraft/build_libraft_template.sh b/conda/recipes/libraft/build_libraft_template.sh
index bd7719af76..974b0a5b58 100644
--- a/conda/recipes/libraft/build_libraft_template.sh
+++ b/conda/recipes/libraft/build_libraft_template.sh
@@ -2,4 +2,4 @@
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 # Just building template so we verify it uses libraft.so and fail if it doesn't build
-./build.sh template
+./build.sh template -v
diff --git a/conda/recipes/libraft/build_libraft_tests.sh b/conda/recipes/libraft/build_libraft_tests.sh
index 05a2b59eb0..08f0d33485 100644
--- a/conda/recipes/libraft/build_libraft_tests.sh
+++ b/conda/recipes/libraft/build_libraft_tests.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
-./build.sh tests bench-prims --allgpuarch --no-nvtx --build-metrics=tests_bench_prims --incl-cache-stats
+./build.sh tests bench-prims -v --allgpuarch --no-nvtx --build-metrics=tests_bench_prims --incl-cache-stats
 cmake --install cpp/build --component testing
diff --git a/conda/recipes/raft-ann-bench/build.sh b/conda/recipes/raft-ann-bench/build.sh
index 00078792a1..9c411774b6 100644
--- a/conda/recipes/raft-ann-bench/build.sh
+++ b/conda/recipes/raft-ann-bench/build.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-./build.sh bench-ann --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats
+./build.sh bench-ann -v --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats
 cmake --install cpp/build --component ann_bench

From 7d8ee1318554358149f53fc6ac8c408a7197fd3a Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Mon, 28 Aug 2023 12:02:15 -0400
Subject: [PATCH 36/70] FAISS refinement

---
 cpp/bench/ann/src/faiss/faiss_benchmark.cu |  1 +
 cpp/bench/ann/src/faiss/faiss_wrapper.h    | 11 ++++++++++-
 docs/source/ann_benchmarks_param_tuning.md | 15 ++++++++-------
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/cpp/bench/ann/src/faiss/faiss_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
index 619565d107..231154ccfd 100644
--- a/cpp/bench/ann/src/faiss/faiss_benchmark.cu
+++ b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
@@ -68,6 +68,7 @@ void parse_search_param(const nlohmann::json& conf,
                         typename raft::bench::ann::FaissGpu<T>::SearchParam& param)
 {
   param.nprobe = conf.at("nprobe");
+  if (conf.contains("refine_ratio")) { param.refine_ratio = conf.at("refine_ratio"); }
 }
 
 template <typename T, template <typename> class Algo>
diff --git a/cpp/bench/ann/src/faiss/faiss_wrapper.h b/cpp/bench/ann/src/faiss/faiss_wrapper.h
index 7a3f91853f..ec80e6cbfd 100644
--- a/cpp/bench/ann/src/faiss/faiss_wrapper.h
+++ b/cpp/bench/ann/src/faiss/faiss_wrapper.h
@@ -23,6 +23,7 @@
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexIVFFlat.h>
 #include <faiss/IndexIVFPQ.h>
+#include <faiss/IndexRefine.h>
 #include <faiss/IndexScalarQuantizer.h>
 #include <faiss/gpu/GpuIndexFlat.h>
 #include <faiss/gpu/GpuIndexIVFFlat.h>
@@ -81,6 +82,7 @@ class FaissGpu : public ANN<T> {
   using typename ANN<T>::AnnSearchParam;
   struct SearchParam : public AnnSearchParam {
     int nprobe;
+    float refine_ratio = 1.0;
   };
 
   FaissGpu(Metric metric, int dim, int nlist);
@@ -123,6 +125,7 @@ class FaissGpu : public ANN<T> {
 
   mutable faiss::gpu::StandardGpuResources gpu_resource_;
   std::unique_ptr<faiss::gpu::GpuIndex> index_;
+  std::unique_ptr<faiss::IndexRefineFlat> index_refine_;
   faiss::MetricType metric_type_;
   int nlist_;
   int device_;
@@ -154,9 +157,15 @@ void FaissGpu<T>::build(const T* dataset, size_t nrow, cudaStream_t stream)
 template <typename T>
 void FaissGpu<T>::set_search_param(const AnnSearchParam& param)
 {
-  int nprobe = dynamic_cast<const SearchParam&>(param).nprobe;
+  auto search_param = dynamic_cast<const SearchParam&>(param);
+  int nprobe        = search_param.nprobe;
   assert(nprobe <= nlist_);
   dynamic_cast<faiss::gpu::GpuIndexIVF*>(index_.get())->setNumProbes(nprobe);
+
+  if (search_param.refine_ratio > 1.0) {
+    this->index_refine_ = std::make_unique<faiss::IndexRefineFlat>(this->index_.get());
+    this->index_refine_.get()->k_factor = search_param.refine_ratio;
+  }
 }
 
 template <typename T>
diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
index f5801322d3..40151b1fad 100644
--- a/docs/source/ann_benchmarks_param_tuning.md
+++ b/docs/source/ann_benchmarks_param_tuning.md
@@ -55,13 +55,14 @@ IVF-flat is a simple algorithm which won't save any space, but it provides compe
 
 IVF-pq is an inverted-file index, which partitions the vectors into a series of clusters, or lists, in a similar way to IVF-flat above. The difference is that IVF-PQ uses product quantization to also compress the vectors, giving the index a smaller memory footprint. Unfortunately, higher levels of compression can also shrink recall, which a refinement step can improve when the original vectors are still available.
 
-| Parameter               | Type           | Required | Data Type                        | Default | Description                                                                                                                                                                       |
-|-------------------------|----------------|----------|----------------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `nlists`                | `build_param`  | Y        | Positive Integer >0              |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
-| `M`                     | `build_param`  | Y        | Positive Integer Power of 2 [8-64] |         | Number of chunks or subquantizers for each vector.                                                                                                                                |
-| `usePrecomputed`                | `build_param`  | N        | Boolean. Default=`false`         | `false` | Use pre-computed lookup tables to speed up search at the cost of increased memory usage.                                                                                          |
-| `useFloat16`               | `build_param`  | N        | Boolean. Default=`false`         | `false`  | Use half-precision floats for clustering step.                                                                                                                                    |
-| `nprobe`                | `search_params` | Y        | Positive Integer >0              |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
+| Parameter        | Type           | Required | Data Type                        | Default | Description                                                                                                                                                                       |
+|------------------|----------------|----------|----------------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `nlists`         | `build_param`  | Y        | Positive Integer >0              |         | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
+| `M`              | `build_param`  | Y        | Positive Integer Power of 2 [8-64] |         | Number of chunks or subquantizers for each vector.                                                                                                                                |
+| `usePrecomputed` | `build_param`  | N        | Boolean. Default=`false`         | `false` | Use pre-computed lookup tables to speed up search at the cost of increased memory usage.                                                                                          |
+| `useFloat16`     | `build_param`  | N        | Boolean. Default=`false`         | `false`  | Use half-precision floats for clustering step.                                                                                                                                    |
+| `numProbes`      | `search_params` | Y        | Positive Integer >0              |         | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index.                                       |
+| `refine_ratio`   | `search_params` | N| Positive Number >=0          | 0       | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors.           |
 
 
 

From c0ee3238adeef899a1e1f600a097a1c1dfd345bd Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 28 Aug 2023 13:20:48 -0500
Subject: [PATCH 37/70] FIX typo in build.sh

---
 build.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/build.sh b/build.sh
index 0319c0e3dc..3ae6c338db 100755
--- a/build.sh
+++ b/build.sh
@@ -499,8 +499,7 @@ fi
 # Build and (optionally) install the raft-ann-bench Python package
 if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
     SKBUILD_CONFIGURE_OPTIONS="${SKBUILD_EXTRA_CMAKE_ARGS}" \
-        SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL}" \
-        python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-dask
+        python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-ann-bench
 fi
 
 if hasArg docs; then

From aa608d2b90be4a73dcec2289909153e819612fd9 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 28 Aug 2023 15:02:47 -0500
Subject: [PATCH 38/70] DBG single commit of changes

---
 .gitignore                                    |    1 +
 bench/ann/conf/bigann-100M.json               |  206 ---
 bench/ann/conf/deep-100M.json                 |  859 -----------
 bench/ann/conf/deep-1B.json                   |   38 -
 bench/ann/conf/sift-128-euclidean.json        | 1366 -----------------
 bench/ann/data_export.py                      |   64 -
 build.sh                                      |   17 +-
 ci/build_python.sh                            |   19 +-
 .../all_cuda-118_arch-x86_64.yaml             |    6 +-
 .../all_cuda-120_arch-x86_64.yaml             |    6 +-
 conda/recipes/libraft/build_libraft.sh        |    2 +-
 .../recipes/libraft/build_libraft_headers.sh  |    2 +-
 .../recipes/libraft/build_libraft_template.sh |    2 +-
 conda/recipes/libraft/build_libraft_tests.sh  |    2 +-
 conda/recipes/libraft/meta.yaml               |   59 -
 .../build_raft_nn_bench_cpu.sh}               |    2 +-
 conda/recipes/raft-ann-bench-cpu/meta.yaml    |   65 +
 conda/recipes/raft-ann-bench/build.sh         |    5 +
 .../raft-ann-bench/conda_build_config.yaml    |   73 +
 conda/recipes/raft-ann-bench/meta.yaml        |   96 ++
 conda/recipes/raft-dask/meta.yaml             |    6 +-
 cpp/CMakeLists.txt                            |   58 +-
 cpp/bench/ann/CMakeLists.txt                  |  124 +-
 cpp/bench/ann/scripts/eval.pl                 |  430 ------
 cpp/bench/ann/src/common/ann_types.hpp        |   73 +-
 cpp/bench/ann/src/common/benchmark.cpp        |  110 ++
 cpp/bench/ann/src/common/benchmark.hpp        |  934 ++++++-----
 cpp/bench/ann/src/common/benchmark_util.hpp   |   33 -
 cpp/bench/ann/src/common/conf.cpp             |    2 +-
 cpp/bench/ann/src/common/conf.hpp             |  158 ++
 cpp/bench/ann/src/common/cuda_stub.hpp        |  159 ++
 .../ann/src/common/{dataset.h => dataset.hpp} |   89 +-
 cpp/bench/ann/src/common/util.cpp             |   68 -
 cpp/bench/ann/src/common/util.h               |   79 -
 cpp/bench/ann/src/common/util.hpp             |  347 +++++
 cpp/bench/ann/src/faiss/faiss_benchmark.cu    |   12 +-
 cpp/bench/ann/src/faiss/faiss_wrapper.h       |   15 +-
 cpp/bench/ann/src/ggnn/ggnn_benchmark.cu      |   14 +-
 cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh       |   18 +-
 .../ann/src/hnswlib/hnswlib_benchmark.cpp     |   21 +-
 cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h   |    7 +-
 cpp/bench/ann/src/raft/raft_benchmark.cu      |   42 +-
 cpp/bench/ann/src/raft/raft_cagra_wrapper.h   |   65 +-
 .../ann/src/raft/raft_ivf_flat_wrapper.h      |   44 +-
 cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h  |   51 +-
 cpp/bench/ann/src/raft/raft_wrapper.h         |    7 +-
 dependencies.yaml                             |   28 +-
 docs/source/ann_benchmarks_low_level.md       |  265 ++--
 docs/source/ann_benchmarks_param_tuning.md    |   13 +
 docs/source/raft_ann_benchmarks.md            |   58 +-
 python/raft-ann-bench/LICENSE                 |    1 +
 python/raft-ann-bench/pyproject.toml          |   57 +
 .../raft-ann-bench/raft-ann-bench/__init__.py |    0
 .../raft-ann-bench/data_export/__main__.py    |   92 ++
 .../raft-ann-bench/get_dataset/__main__.py    |   56 +-
 .../get_dataset}/fbin_to_f16bin.py            |   29 +-
 .../get_dataset}/hdf5_to_fbin.py              |   27 +-
 .../raft-ann-bench/plot/__main__.py           |  122 +-
 .../raft-ann-bench/run/__main__.py            |  207 ++-
 .../raft-ann-bench/run/algos-cpu.yaml         |   30 +
 .../raft-ann-bench/run}/algos.yaml            |    0
 .../raft-ann-bench/run/conf/bigann-100M.json  |  196 +++
 .../raft-ann-bench/run/conf/deep-100M.json    |  286 ++++
 .../raft-ann-bench/run/conf/deep-1B.json      |   34 +
 .../run}/conf/deep-image-96-angular.json      |    0
 .../conf/fashion-mnist-784-euclidean.json     |    0
 .../run}/conf/gist-960-euclidean.json         |    0
 .../run}/conf/glove-100-angular.json          |    0
 .../run}/conf/glove-100-inner.json            |  512 +++---
 .../run}/conf/glove-50-angular.json           |    0
 .../run}/conf/lastfm-65-angular.json          |    0
 .../run}/conf/mnist-784-euclidean.json        |    0
 .../run}/conf/nytimes-256-angular.json        |    0
 .../run/conf/sift-128-euclidean.json          |  498 ++++++
 .../split_groundtruth/__main__.py             |   21 +-
 .../split_groundtruth}/split_groundtruth.pl   |    0
 python/raft-dask/pyproject.toml               |    4 +-
 77 files changed, 3902 insertions(+), 4490 deletions(-)
 delete mode 100644 bench/ann/conf/bigann-100M.json
 delete mode 100644 bench/ann/conf/deep-100M.json
 delete mode 100644 bench/ann/conf/deep-1B.json
 delete mode 100644 bench/ann/conf/sift-128-euclidean.json
 delete mode 100644 bench/ann/data_export.py
 rename conda/recipes/{libraft/build_libraft_nn_bench.sh => raft-ann-bench-cpu/build_raft_nn_bench_cpu.sh} (55%)
 create mode 100644 conda/recipes/raft-ann-bench-cpu/meta.yaml
 create mode 100644 conda/recipes/raft-ann-bench/build.sh
 create mode 100644 conda/recipes/raft-ann-bench/conda_build_config.yaml
 create mode 100644 conda/recipes/raft-ann-bench/meta.yaml
 delete mode 100755 cpp/bench/ann/scripts/eval.pl
 create mode 100644 cpp/bench/ann/src/common/benchmark.cpp
 delete mode 100644 cpp/bench/ann/src/common/benchmark_util.hpp
 create mode 100644 cpp/bench/ann/src/common/conf.hpp
 create mode 100644 cpp/bench/ann/src/common/cuda_stub.hpp
 rename cpp/bench/ann/src/common/{dataset.h => dataset.hpp} (85%)
 delete mode 100644 cpp/bench/ann/src/common/util.cpp
 delete mode 100644 cpp/bench/ann/src/common/util.h
 create mode 100644 cpp/bench/ann/src/common/util.hpp
 create mode 100644 docs/source/ann_benchmarks_param_tuning.md
 create mode 120000 python/raft-ann-bench/LICENSE
 create mode 100644 python/raft-ann-bench/pyproject.toml
 create mode 100644 python/raft-ann-bench/raft-ann-bench/__init__.py
 create mode 100644 python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
 rename bench/ann/get_dataset.py => python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py (62%)
 rename {cpp/bench/ann/scripts => python/raft-ann-bench/raft-ann-bench/get_dataset}/fbin_to_f16bin.py (57%)
 rename {cpp/bench/ann/scripts => python/raft-ann-bench/raft-ann-bench/get_dataset}/hdf5_to_fbin.py (78%)
 rename bench/ann/plot.py => python/raft-ann-bench/raft-ann-bench/plot/__main__.py (70%)
 rename bench/ann/run.py => python/raft-ann-bench/raft-ann-bench/run/__main__.py (55%)
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/algos-cpu.yaml
 rename {bench/ann => python/raft-ann-bench/raft-ann-bench/run}/algos.yaml (100%)
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json
 rename {bench/ann => python/raft-ann-bench/raft-ann-bench/run}/conf/deep-image-96-angular.json (100%)
 rename {bench/ann => python/raft-ann-bench/raft-ann-bench/run}/conf/fashion-mnist-784-euclidean.json (100%)
 rename {bench/ann => python/raft-ann-bench/raft-ann-bench/run}/conf/gist-960-euclidean.json (100%)
 rename {bench/ann => python/raft-ann-bench/raft-ann-bench/run}/conf/glove-100-angular.json (100%)
 rename {bench/ann => python/raft-ann-bench/raft-ann-bench/run}/conf/glove-100-inner.json (56%)
 rename {bench/ann => python/raft-ann-bench/raft-ann-bench/run}/conf/glove-50-angular.json (100%)
 rename {bench/ann => python/raft-ann-bench/raft-ann-bench/run}/conf/lastfm-65-angular.json (100%)
 rename {bench/ann => python/raft-ann-bench/raft-ann-bench/run}/conf/mnist-784-euclidean.json (100%)
 rename {bench/ann => python/raft-ann-bench/raft-ann-bench/run}/conf/nytimes-256-angular.json (100%)
 create mode 100644 python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json
 rename bench/ann/split_groundtruth.py => python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py (63%)
 rename {cpp/bench/ann/scripts => python/raft-ann-bench/raft-ann-bench/split_groundtruth}/split_groundtruth.pl (100%)

diff --git a/.gitignore b/.gitignore
index c2528d2cd0..7eb29cbcb7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ log
 dask-worker-space/
 *.egg-info/
 *.bin
+bench/ann/data
 
 ## scikit-build
 _skbuild
diff --git a/bench/ann/conf/bigann-100M.json b/bench/ann/conf/bigann-100M.json
deleted file mode 100644
index 82e9383d15..0000000000
--- a/bench/ann/conf/bigann-100M.json
+++ /dev/null
@@ -1,206 +0,0 @@
-{
-  "dataset" : {
-    "name" : "bigann-100M",
-    "base_file" : "data/bigann-1B/base.1B.u8bin",
-    "subset_size" : 100000000,
-    "query_file" : "data/bigann-1B/query.public.10K.u8bin",
-    "distance" : "euclidean"
-  },
-
-  "search_basic_param" : {
-    "batch_size" : 10000,
-    "k" : 10,
-    "run_count" : 2
-  },
-
-  "index" : [
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster5K-float-float",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "host",
-      "build_param": {
-        "niter": 25,
-        "nlist": 5000,
-        "pq_dim": 64,
-        "ratio": 10
-      },
-      "file": "index/bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
-      "search_params": [
-        {
-          "numProbes": 20,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 30,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 40,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "numProbes": 1000,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/bigann-100M/raft_ivf_pq/dimpq64-cluster5K-float-float"
-    },
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M12",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M16",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M24",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/bigann-100M/hnswlib/M36",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/bigann-100M/hnswlib/M36"
-    },
-
-
-    {
-      "name" : "raft_ivf_flat.nlist100K",
-      "algo" : "raft_ivf_flat",
-      "dataset_memtype": "host",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/bigann-100M/raft_ivf_flat/nlist100K",
-      "search_params" : [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/bigann-100M/raft_ivf_flat/nlist100K"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "dataset_memtype": "host",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/bigann-100M/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/bigann-100M/raft_cagra/dim32"
-    },
-
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "dataset_memtype": "host",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/bigann-100M/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/bigann-100M/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/bench/ann/conf/deep-100M.json b/bench/ann/conf/deep-100M.json
deleted file mode 100644
index f3776b566e..0000000000
--- a/bench/ann/conf/deep-100M.json
+++ /dev/null
@@ -1,859 +0,0 @@
-{
-  "dataset" : {
-    "name" : "deep-100M",
-    "base_file" : "data/deep-1B/base.1B.fbin",
-    "subset_size" : 100000000,
-    "query_file" : "data/deep-1B/query.public.10K.fbin",
-    "distance" : "euclidean"
-  },
-
-  "search_basic_param" : {
-    "batch_size" : 10000,
-    "k" : 10,
-    "run_count" : 2
-  },
-
-  "index" : [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M12",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M16",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M24",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-100M/hnswlib/M36",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/deep-100M/hnswlib/M36"
-    },
-    {
-      "name" : "faiss_ivf_flat.nlist50K",
-      "algo" : "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":50000},
-      "file" : "index/deep-100M/faiss_ivf_flat/nlist50K",
-      "search_params" : [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist50K"
-    },
-    {
-      "name" : "faiss_ivf_flat.nlist100K",
-      "algo" : "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":100000},
-      "file" : "index/deep-100M/faiss_ivf_flat/nlist100K",
-      "search_params" : [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist100K"
-    },
-    {
-      "name" : "faiss_ivf_flat.nlist200K",
-      "algo" : "faiss_gpu_ivf_flat",
-      "build_param": {"nlist":200000},
-      "file" : "index/deep-100M/faiss_ivf_flat/nlist200K",
-      "search_params" : [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist200K"
-    },
-
-
-    {
-      "name" : "faiss_ivf_pq.M48-nlist16K",
-      "algo" : "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":16384, "M":48},
-      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist16K",
-      "search_params" : [
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist16K"
-    },
-    {
-      "name" : "faiss_ivf_pq.M48-nlist50K",
-      "algo" : "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":50000, "M":48},
-      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist50K",
-      "search_params" : [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist50K"
-    },
-    {
-      "name" : "faiss_ivf_pq.M48-nlist100K",
-      "algo" : "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":100000, "M":48},
-      "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist100K",
-      "search_params" : [
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist100K"
-    },
-
-
-    {
-      "name" : "raft_ivf_flat.nlist10K",
-      "algo" : "raft_ivf_flat",
-      "dataset_memtype": "host",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_flat/nlist10K",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_flat/nlist10K"
-    },
-    {
-      "name" : "raft_ivf_flat.nlist100K",
-      "algo" : "raft_ivf_flat",
-      "dataset_memtype": "host",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_flat/nlist100K",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_flat/nlist100K"
-    },
-
-    {
-      "name" : "raft_ivf_pq.nlist10K",
-      "algo" : "raft_ivf_pq",
-      "dataset_memtype": "host",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist10K",
-      "search_params" : [
-        {"nprobe":3},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10K"
-    },
-    {
-      "name" : "raft_ivf_pq.nlist10Kdim64",
-      "algo" : "raft_ivf_pq",
-      "dataset_memtype": "host",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 64
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist10Kdim64",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10Kdim64"
-    },
-    {
-      "name" : "raft_ivf_pq.nlist10Kdim32",
-      "algo" : "raft_ivf_pq",
-      "dataset_memtype": "host",
-      "build_param": {
-        "nlist" : 10000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 32
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist10Kdim32",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist10Kdim32"
-    },
-    {
-      "name" : "raft_ivf_pq.nlist100K",
-      "algo" : "raft_ivf_pq",
-      "dataset_memtype": "host",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist100K",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100K"
-    },
-    {
-      "name" : "raft_ivf_pq.nlist100Kdim64",
-      "algo" : "raft_ivf_pq",
-      "dataset_memtype": "host",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 64
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist100Kdim64",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100Kdim64"
-    },
-    {
-      "name" : "raft_ivf_pq.nlist100Kdim32",
-      "algo" : "raft_ivf_pq",
-      "dataset_memtype": "host",
-      "build_param": {
-        "nlist" : 100000,
-        "niter" : 25,
-        "ratio" : 5,
-        "pq_dim": 32
-      },
-      "file" : "index/deep-100M/raft_ivf_pq/nlist100Kdim32",
-      "search_params" : [
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":20},
-        {"nprobe":30},
-        {"nprobe":40},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000}
-      ],
-      "search_result_file" : "result/deep-100M/raft_ivf_pq/nlist100Kdim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "dataset_memtype": "host",
-      "build_param": {
-        "index_dim": 32,
-        "intermediate_graph_degree": 48
-      },
-      "file": "index/deep-100M/raft_cagra/dim32",
-      "search_params" : [
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        }
-      ],
-      "search_result_file": "result/deep-100M/raft_cagra/dim32"
-    },
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "dataset_memtype": "host",
-      "build_param": {
-        "index_dim": 64
-      },
-      "file": "index/deep-100M/raft_cagra/dim64",
-      "search_params" : [
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "single_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "single_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_cta"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_cta"
-        },
-
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 0,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 32,
-          "search_width": 1,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 4,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 64,
-          "search_width": 1,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 96,
-          "search_width": 2,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 8,
-          "max_iterations": 16,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 128,
-          "search_width": 2,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 8,
-          "max_iterations": 24,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 192,
-          "search_width": 2,
-          "max_iterations": 96,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 256,
-          "search_width": 8,
-          "max_iterations": 32,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 384,
-          "search_width": 8,
-          "max_iterations": 48,
-          "algo": "multi_kernel"
-        },
-        {
-          "itopk": 512,
-          "search_width": 8,
-          "max_iterations": 64,
-          "algo": "multi_kernel"
-        }
-      ],
-      "search_result_file": "result/deep-100M/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/bench/ann/conf/deep-1B.json b/bench/ann/conf/deep-1B.json
deleted file mode 100644
index 50d1b87602..0000000000
--- a/bench/ann/conf/deep-1B.json
+++ /dev/null
@@ -1,38 +0,0 @@
-{
-  "dataset" : {
-    "name" : "deep-1B",
-    "base_file" : "data/deep-1B/base.1B.fbin",
-    "query_file" : "data/deep-1B/query.public.10K.fbin",
-    // although distance should be "euclidean", faiss becomes much slower for that
-    "distance" : "inner_product"
-  },
-
-  "search_basic_param" : {
-    "batch_size" : 10000,
-    "k" : 10,
-    "run_count" : 2
-  },
-
-  "index" : [
-    {
-      "name" : "faiss_ivf_pq.M48-nlist50K",
-      "algo" : "faiss_gpu_ivf_pq",
-      "build_param": {"nlist":50000, "M":48},
-      "file" : "index/deep-1B/faiss_ivf_pq/M48-nlist50K",
-      "search_params" : [
-        {"nprobe":1},
-        {"nprobe":5},
-        {"nprobe":10},
-        {"nprobe":50},
-        {"nprobe":100},
-        {"nprobe":200},
-        {"nprobe":500},
-        {"nprobe":1000},
-        {"nprobe":2000}
-      ],
-      "search_result_file" : "result/deep-1B/faiss_ivf_pq/M48-nlist50K"
-    },
-
-
-  ]
-}
diff --git a/bench/ann/conf/sift-128-euclidean.json b/bench/ann/conf/sift-128-euclidean.json
deleted file mode 100644
index 2f9956ed3d..0000000000
--- a/bench/ann/conf/sift-128-euclidean.json
+++ /dev/null
@@ -1,1366 +0,0 @@
-{
-  "dataset": {
-    "name": "sift-128-euclidean",
-    "base_file": "data/sift-128-euclidean/base.fbin",
-    "query_file": "data/sift-128-euclidean/query.fbin",
-    "distance": "euclidean"
-  },
-  "search_basic_param": {
-    "batch_size": 5000,
-    "k": 10,
-    "run_count": 3
-  },
-  "index": [
-    {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
-      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M12",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M12"
-    },
-    {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
-      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M16",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M16"
-    },
-    {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
-      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M24",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M24"
-    },
-    {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
-      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/sift-128-euclidean/hnswlib/M36",
-      "search_params" : [
-        {"ef":10, "numThreads":1},
-        {"ef":20, "numThreads":1},
-        {"ef":40, "numThreads":1},
-        {"ef":60, "numThreads":1},
-        {"ef":80, "numThreads":1},
-        {"ef":120, "numThreads":1},
-        {"ef":200, "numThreads":1},
-        {"ef":400, "numThreads":1},
-        {"ef":600, "numThreads":1},
-        {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/hnswlib/M36"
-    },
-
-
-
-
-    {
-      "name": "raft_bfknn",
-      "algo": "raft_bfknn",
-      "dataset_memtype": "device",
-      "build_param": {},
-      "file": "index/sift-128-euclidean/raft_bfknn/bfknn",
-      "search_params": [
-        {
-          "probe": 1
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_bfknn/bfknn"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist1024",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 1024
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist1024"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist2048",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 2048
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist2048",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist2048"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist4096",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 4096
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist4096",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist4096"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist8192",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 8192
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist8192",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist8192"
-    },
-    {
-      "name": "faiss_ivf_flat.nlist16384",
-      "algo": "faiss_gpu_ivf_flat",
-      "build_param": {
-        "nlist": 16384
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_flat/nlist16384"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": true
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
-      "algo": "faiss_gpu_ivf_pq",
-      "build_param": {
-        "nlist": 1024,
-        "M": 64,
-        "useFloat16": true,
-        "usePrecomputed": false
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
-      "search_params": [
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_pq/M64-nlist1024"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-fp16",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "fp16"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist1024-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 1024,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist1024-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist1024-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist2048-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 2048,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist2048-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist2048-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist4096-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 4096,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist4096-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist4096-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist8192-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 8192,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist8192-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist8192-int8"
-    },
-    {
-      "name": "faiss_ivf_sq.nlist16384-int8",
-      "algo": "faiss_gpu_ivf_sq",
-      "build_param": {
-        "nlist": 16384,
-        "quantizer_type": "int8"
-      },
-      "file": "index/sift-128-euclidean/faiss_ivf_sq/nlist16384-int8",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_ivf_sq/nlist16384-int8"
-    },
-    {
-      "name": "faiss_flat",
-      "algo": "faiss_gpu_flat",
-      "build_param": {},
-      "file": "index/sift-128-euclidean/faiss_flat/flat",
-      "search_params": [
-        {}
-      ],
-      "search_result_file": "result/sift-128-euclidean/faiss_flat/flat"
-    },
-
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 5,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 64,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "half"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq64-cluster1024-float-half"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 32,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 16,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "fp8"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 128,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "half",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq128-cluster1024-half-float"
-    },
-    {
-      "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
-      "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "pq_dim": 512,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float",
-      "search_params": [
-        {
-          "k": 10,
-          "numProbes": 10,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 50,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 100,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 200,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 500,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        },
-        {
-          "k": 10,
-          "numProbes": 1024,
-          "internalDistanceDtype": "float",
-          "smemLutDtype": "float"
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_pq/dimpq512-cluster1024-float-float"
-    },
-    {
-      "name": "raft_ivf_flat.nlist1024",
-      "algo": "raft_ivf_flat",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 1024,
-        "ratio": 1,
-        "niter": 25
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_flat/nlist1024",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_flat/nlist1024"
-    },
-    {
-      "name": "raft_ivf_flat.nlist16384",
-      "algo": "raft_ivf_flat",
-      "dataset_memtype": "device",
-      "build_param": {
-        "nlist": 16384,
-        "ratio": 2,
-        "niter": 20
-      },
-      "file": "index/sift-128-euclidean/raft_ivf_flat/nlist16384",
-      "search_params": [
-        {
-          "nprobe": 1
-        },
-        {
-          "nprobe": 5
-        },
-        {
-          "nprobe": 10
-        },
-        {
-          "nprobe": 50
-        },
-        {
-          "nprobe": 100
-        },
-        {
-          "nprobe": 200
-        },
-        {
-          "nprobe": 500
-        },
-        {
-          "nprobe": 1000
-        },
-        {
-          "nprobe": 2000
-        }
-      ],
-      "search_result_file": "result/sift-128-euclidean/raft_ivf_flat/nlist16384"
-    },
-
-    {
-      "name" : "raft_cagra.dim32",
-      "algo" : "raft_cagra",
-      "dataset_memtype": "device",
-      "build_param": {
-        "index_dim" : 32
-      },
-      "file" : "index/sift-128-euclidean/raft_cagra/dim32",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/raft_cagra/dim32"
-    },
-
-    {
-      "name" : "raft_cagra.dim64",
-      "algo" : "raft_cagra",
-      "dataset_memtype": "device",
-      "build_param": {
-        "index_dim" : 64
-      },
-      "file" : "index/sift-128-euclidean/raft_cagra/dim64",
-      "search_params" : [
-        {"itopk": 32},
-        {"itopk": 64},
-        {"itopk": 128}
-      ],
-      "search_result_file" : "result/sift-128-euclidean/raft_cagra/dim64"
-    }
-  ]
-}
diff --git a/bench/ann/data_export.py b/bench/ann/data_export.py
deleted file mode 100644
index 9410cfe773..0000000000
--- a/bench/ann/data_export.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import subprocess
-
-
-def export_results(output_filepath, recompute, groundtruth_filepath,
-                   result_filepath):
-    print(f"Writing output file to: {output_filepath}")
-    ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
-                                         "cpp/bench/ann/scripts")
-    ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
-                                          "eval.pl")
-    if recompute:
-        p = subprocess.Popen([ann_bench_scripts_path, "-f", "-o", output_filepath,
-                              groundtruth_filepath, result_filepath])
-    else:
-        p = subprocess.Popen([ann_bench_scripts_path, "-o", output_filepath,
-                              groundtruth_filepath, result_filepath])
-    p.wait()
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--output", help="Path to the CSV output file",
-                        required=True)
-    parser.add_argument("--recompute", action="store_true",
-                        help="Recompute metrics")
-    parser.add_argument("--dataset",
-                        help="Name of the dataset to export results for",
-                        default="glove-100-inner")
-    parser.add_argument(
-        "--dataset-path",
-        help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"), 
-                             "bench", "ann", "data")
-    )
-    
-    args = parser.parse_args()
-
-    result_filepath = os.path.join(args.dataset_path, args.dataset, "result")
-    groundtruth_filepath = os.path.join(args.dataset_path, args.dataset, 
-                                        "groundtruth.neighbors.ibin")
-    export_results(args.output, args.recompute, groundtruth_filepath,
-                   result_filepath)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/build.sh b/build.sh
index 8706f1b138..3ae6c338db 100755
--- a/build.sh
+++ b/build.sh
@@ -39,6 +39,7 @@ HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<to
    --uninstall                 - uninstall files for specified targets which were built and installed prior
    --compile-lib               - compile shared libraries for all components
                                  can be useful for a pure header-only install
+   --cpu-only                  - build CPU only components without CUDA. Applies to bench-ann only currently.
    --limit-tests               - semicolon-separated list of test executables to compile (e.g. NEIGHBORS_TEST;CLUSTER_TEST)
    --limit-bench-prims         - semicolon-separated list of prims benchmark executables to compute (e.g. NEIGHBORS_PRIMS_BENCH;CLUSTER_PRIMS_BENCH)
    --limit-bench-ann           - semicolon-separated list of ann benchmark executables to compute (e.g. HNSWLIB_ANN_BENCH;RAFT_IVF_PQ_ANN_BENCH)
@@ -71,6 +72,7 @@ BUILD_TESTS=OFF
 BUILD_TYPE=Release
 BUILD_PRIMS_BENCH=OFF
 BUILD_ANN_BENCH=OFF
+CPU_ONLY=OFF
 COMPILE_LIBRARY=OFF
 INSTALL_TARGET=install
 BUILD_REPORT_METRICS=""
@@ -152,7 +154,7 @@ function limitTests {
             # Remove the full LIMIT_TEST_TARGETS argument from list of args so that it passes validArgs function
             ARGS=${ARGS//--limit-tests=$LIMIT_TEST_TARGETS/}
             TEST_TARGETS=${LIMIT_TEST_TARGETS}
-	    echo "Limiting tests to $TEST_TARGETS"
+        echo "Limiting tests to $TEST_TARGETS"
         fi
     fi
 }
@@ -342,7 +344,12 @@ fi
 if hasArg bench-ann || (( ${NUMARGS} == 0 )); then
     BUILD_ANN_BENCH=ON
     CMAKE_TARGET="${CMAKE_TARGET};${ANN_BENCH_TARGETS}"
-    COMPILE_LIBRARY=ON
+    if hasArg cpu-only; then
+        COMPILE_LIBRARY=OFF
+    else
+        COMPILE_LIBRARY=ON
+        CPU_ONLY=ON
+    fi
 fi
 
 if hasArg --no-nvtx; then
@@ -415,6 +422,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
           -DBUILD_TESTS=${BUILD_TESTS} \
           -DBUILD_PRIMS_BENCH=${BUILD_PRIMS_BENCH} \
           -DBUILD_ANN_BENCH=${BUILD_ANN_BENCH} \
+          -DCPU_ONLY=${CPU_ONLY} \
           -DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \
           ${CACHE_ARGS} \
           ${EXTRA_CMAKE_ARGS}
@@ -488,6 +496,11 @@ if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
         python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-dask
 fi
 
+# Build and (optionally) install the raft-ann-bench Python package
+if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
+    SKBUILD_CONFIGURE_OPTIONS="${SKBUILD_EXTRA_CMAKE_ARGS}" \
+        python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-ann-bench
+fi
 
 if hasArg docs; then
     set -x
diff --git a/ci/build_python.sh b/ci/build_python.sh
index b20fd51bca..48503bf841 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 set -euo pipefail
 
@@ -26,4 +26,21 @@ rapids-mamba-retry mambabuild \
   --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
   conda/recipes/raft-dask
 
+# Build ann-bench for each cuda and python version
+rapids-mamba-retry mambabuild \
+--no-test \
+--channel "${CPP_CHANNEL}" \
+--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
+conda/recipes/raft-ann-bench
+
+# Build ann-bench-cpu only in CUDA 12 jobs since it only depends on python
+# version
+if [[ ${CUDA_VERSION} == "12.0.1" ]]; then
+  rapids-mamba-retry mambabuild \
+  --no-test \
+  --channel "${CPP_CHANNEL}" \
+  --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
+  conda/recipes/raft-ann-bench-cpu
+fi
+
 rapids-upload-conda-to-s3 python
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 7e921decd5..223bafe70b 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -19,10 +19,10 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.7.1
 - dask-cuda==23.10.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.7.1
+- distributed==2023.7.1
 - doxygen>=1.8.20
 - gcc_linux-64=11.*
 - gmock>=1.13.0
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index 2ea685b529..e68feaad82 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -19,10 +19,10 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.7.1
 - dask-cuda==23.10.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.7.1
+- distributed==2023.7.1
 - doxygen>=1.8.20
 - gcc_linux-64=11.*
 - gmock>=1.13.0
diff --git a/conda/recipes/libraft/build_libraft.sh b/conda/recipes/libraft/build_libraft.sh
index 7d4173e8bb..71e1533893 100644
--- a/conda/recipes/libraft/build_libraft.sh
+++ b/conda/recipes/libraft/build_libraft.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
-./build.sh libraft --allgpuarch --compile-lib --build-metrics=compile_lib --incl-cache-stats --no-nvtx
+./build.sh libraft -v --allgpuarch --compile-lib --build-metrics=compile_lib --incl-cache-stats --no-nvtx
diff --git a/conda/recipes/libraft/build_libraft_headers.sh b/conda/recipes/libraft/build_libraft_headers.sh
index cc3b840e43..330ac92ff3 100644
--- a/conda/recipes/libraft/build_libraft_headers.sh
+++ b/conda/recipes/libraft/build_libraft_headers.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
-./build.sh libraft --allgpuarch --no-nvtx
+./build.sh libraft -v --allgpuarch --no-nvtx
diff --git a/conda/recipes/libraft/build_libraft_template.sh b/conda/recipes/libraft/build_libraft_template.sh
index bd7719af76..974b0a5b58 100644
--- a/conda/recipes/libraft/build_libraft_template.sh
+++ b/conda/recipes/libraft/build_libraft_template.sh
@@ -2,4 +2,4 @@
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
 # Just building template so we verify it uses libraft.so and fail if it doesn't build
-./build.sh template
+./build.sh template -v
diff --git a/conda/recipes/libraft/build_libraft_tests.sh b/conda/recipes/libraft/build_libraft_tests.sh
index 05a2b59eb0..08f0d33485 100644
--- a/conda/recipes/libraft/build_libraft_tests.sh
+++ b/conda/recipes/libraft/build_libraft_tests.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
 # Copyright (c) 2022-2023, NVIDIA CORPORATION.
 
-./build.sh tests bench-prims --allgpuarch --no-nvtx --build-metrics=tests_bench_prims --incl-cache-stats
+./build.sh tests bench-prims -v --allgpuarch --no-nvtx --build-metrics=tests_bench_prims --incl-cache-stats
 cmake --install cpp/build --component testing
diff --git a/conda/recipes/libraft/meta.yaml b/conda/recipes/libraft/meta.yaml
index 09ef7ae4ab..96bc6cc5dd 100644
--- a/conda/recipes/libraft/meta.yaml
+++ b/conda/recipes/libraft/meta.yaml
@@ -269,62 +269,3 @@ outputs:
       home: https://rapids.ai/
       license: Apache-2.0
       summary: libraft template
-  - name: libraft-ann-bench
-    version: {{ version }}
-    script: build_libraft_nn_bench.sh
-    build:
-      script_env: *script_env
-      number: {{ GIT_DESCRIBE_NUMBER }}
-      string: cuda{{ cuda_major }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
-      ignore_run_exports_from:
-        {% if cuda_major == "11" %}
-        - {{ compiler('cuda11') }}
-        {% endif %}
-    requirements:
-      build:
-        - {{ compiler('c') }}
-        - {{ compiler('cxx') }}
-        {% if cuda_major == "11" %}
-        - {{ compiler('cuda11') }} ={{ cuda_version }}
-        {% else %}
-        - {{ compiler('cuda') }}
-        {% endif %}
-        - cuda-version ={{ cuda_version }}
-        - cmake {{ cmake_version }}
-        - ninja
-        - sysroot_{{ target_platform }} {{ sysroot_version }}
-      host:
-        - {{ pin_subpackage('libraft', exact=True) }}
-        - cuda-version ={{ cuda_version }}
-        {% if cuda_major == "11" %}
-        - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
-        - libcublas {{ cuda11_libcublas_host_version }}
-        - libcublas-dev {{ cuda11_libcublas_host_version }}
-        {% else %}
-        - cuda-profiler-api
-        - libcublas-dev
-        {% endif %}
-        - glog {{ glog_version }}
-        - nlohmann_json {{ nlohmann_json_version }}
-        # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet
-        {% if cuda_major == "11" %}
-        - faiss-proc=*=cuda
-        - libfaiss {{ faiss_version }}
-        {% endif %}
-      run:
-        - {{ pin_subpackage('libraft', exact=True) }}
-        - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
-        {% if cuda_major == "11" %}
-        - cudatoolkit
-        {% endif %}
-        - glog {{ glog_version }}
-        # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet
-        {% if cuda_major == "11" %}
-        - faiss-proc=*=cuda
-        - libfaiss {{ faiss_version }}
-        {% endif %}
-        - h5py {{ h5py_version }}
-    about:
-      home: https://rapids.ai/
-      license: Apache-2.0
-      summary: libraft ann bench
diff --git a/conda/recipes/libraft/build_libraft_nn_bench.sh b/conda/recipes/raft-ann-bench-cpu/build_raft_nn_bench_cpu.sh
similarity index 55%
rename from conda/recipes/libraft/build_libraft_nn_bench.sh
rename to conda/recipes/raft-ann-bench-cpu/build_raft_nn_bench_cpu.sh
index 00078792a1..b796b4d7d5 100644
--- a/conda/recipes/libraft/build_libraft_nn_bench.sh
+++ b/conda/recipes/raft-ann-bench-cpu/build_raft_nn_bench_cpu.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-./build.sh bench-ann --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats
+./build.sh bench-ann --cpu-only --no-nvtx --build-metrics=bench_ann --incl-cache-stats
 cmake --install cpp/build --component ann_bench
diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml
new file mode 100644
index 0000000000..c0450b9e8a
--- /dev/null
+++ b/conda/recipes/raft-ann-bench-cpu/meta.yaml
@@ -0,0 +1,65 @@
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+
+# Usage:
+#   conda build . -c conda-forge -c nvidia -c rapidsai
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
+{% set cuda_major = cuda_version.split('.')[0] %}
+{% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
+
+package:
+  name: raft-ann-bench-cpu
+  version: {{ version }}
+  script: build.sh
+
+source:
+  git_url: ../../..
+
+build:
+  script_env:
+    - AWS_ACCESS_KEY_ID
+    - AWS_SECRET_ACCESS_KEY
+    - AWS_SESSION_TOKEN
+    - CMAKE_C_COMPILER_LAUNCHER
+    - CMAKE_CUDA_COMPILER_LAUNCHER
+    - CMAKE_CXX_COMPILER_LAUNCHER
+    - CMAKE_GENERATOR
+    - PARALLEL_LEVEL
+    - RAPIDS_ARTIFACTS_DIR
+    - SCCACHE_BUCKET
+    - SCCACHE_IDLE_TIMEOUT
+    - SCCACHE_REGION
+    - SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64]
+    - SCCACHE_S3_USE_SSL
+  number: {{ GIT_DESCRIBE_NUMBER }}
+  string: py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+
+requirements:
+  build:
+    - {{ compiler('c') }}
+    - {{ compiler('cxx') }}
+    - cmake {{ cmake_version }}
+    - ninja
+    - sysroot_{{ target_platform }} {{ sysroot_version }}
+
+  host:
+    - glog {{ glog_version }}
+    - matplotlib
+    - nlohmann_json {{ nlohmann_json_version }}
+    - python
+    - pyyaml
+
+  run:
+    - glog {{ glog_version }}
+    - h5py {{ h5py_version }}
+    - matplotlib
+    - python
+    - pyyaml
+
+about:
+  home: https://rapids.ai/
+  license: Apache-2.0
+  summary: libraft ann bench
diff --git a/conda/recipes/raft-ann-bench/build.sh b/conda/recipes/raft-ann-bench/build.sh
new file mode 100644
index 0000000000..9c411774b6
--- /dev/null
+++ b/conda/recipes/raft-ann-bench/build.sh
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+./build.sh bench-ann -v --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats
+cmake --install cpp/build --component ann_bench
diff --git a/conda/recipes/raft-ann-bench/conda_build_config.yaml b/conda/recipes/raft-ann-bench/conda_build_config.yaml
new file mode 100644
index 0000000000..d156f2609b
--- /dev/null
+++ b/conda/recipes/raft-ann-bench/conda_build_config.yaml
@@ -0,0 +1,73 @@
+c_compiler_version:
+  - 11
+
+cxx_compiler_version:
+  - 11
+
+cuda_compiler:
+  - cuda-nvcc
+
+cuda11_compiler:
+  - nvcc
+
+sysroot_version:
+  - "2.17"
+
+cmake_version:
+  - ">=3.26.4"
+
+nccl_version:
+  - ">=2.9.9"
+
+gtest_version:
+  - ">=1.13.0"
+
+glog_version:
+  - ">=0.6.0"
+
+faiss_version:
+  - ">=1.7.1"
+
+h5py_version:
+  - ">=3.8.0"
+
+nlohmann_json_version:
+  - ">=3.11.2"
+
+# The CTK libraries below are missing from the conda-forge::cudatoolkit package
+# for CUDA 11. The "*_host_*" version specifiers correspond to `11.8` packages
+# and the "*_run_*" version specifiers correspond to `11.x` packages.
+
+cuda11_libcublas_host_version:
+  - "=11.11.3.6"
+
+cuda11_libcublas_run_version:
+  - ">=11.5.2.43,<12.0.0"
+
+cuda11_libcurand_host_version:
+  - "=10.3.0.86"
+
+cuda11_libcurand_run_version:
+  - ">=10.2.5.43,<10.3.1"
+
+cuda11_libcusolver_host_version:
+  - "=11.4.1.48"
+
+cuda11_libcusolver_run_version:
+  - ">=11.2.0.43,<11.4.2"
+
+cuda11_libcusparse_host_version:
+  - "=11.7.5.86"
+
+cuda11_libcusparse_run_version:
+  - ">=11.6.0.43,<12.0.0"
+
+# `cuda-profiler-api` only has `11.8.0` and `12.0.0` packages for all
+# architectures. The "*_host_*" version specifiers correspond to `11.8` packages and the
+# "*_run_*" version specifiers correspond to `11.x` packages.
+
+cuda11_cuda_profiler_api_host_version:
+  - "=11.8.86"
+
+cuda11_cuda_profiler_api_run_version:
+  - ">=11.4.240,<12"
diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml
new file mode 100644
index 0000000000..6e5580dad2
--- /dev/null
+++ b/conda/recipes/raft-ann-bench/meta.yaml
@@ -0,0 +1,96 @@
+# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+
+# Usage:
+#   conda build . -c conda-forge -c nvidia -c rapidsai
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
+{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
+{% set cuda_major = cuda_version.split('.')[0] %}
+{% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0
+{% set date_string = environ['RAPIDS_DATE_STRING'] %}
+
+package:
+  name: raft-ann-bench
+  version: {{ version }}
+  script: build.sh
+
+source:
+  git_url: ../../..
+
+build:
+  script_env:
+    - AWS_ACCESS_KEY_ID
+    - AWS_SECRET_ACCESS_KEY
+    - AWS_SESSION_TOKEN
+    - CMAKE_C_COMPILER_LAUNCHER
+    - CMAKE_CUDA_COMPILER_LAUNCHER
+    - CMAKE_CXX_COMPILER_LAUNCHER
+    - CMAKE_GENERATOR
+    - PARALLEL_LEVEL
+    - RAPIDS_ARTIFACTS_DIR
+    - SCCACHE_BUCKET
+    - SCCACHE_IDLE_TIMEOUT
+    - SCCACHE_REGION
+    - SCCACHE_S3_KEY_PREFIX=libraft-aarch64 # [aarch64]
+    - SCCACHE_S3_KEY_PREFIX=libraft-linux64 # [linux64]
+    - SCCACHE_S3_USE_SSL
+  number: {{ GIT_DESCRIBE_NUMBER }}
+  string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  ignore_run_exports_from:
+    {% if cuda_major == "11" %}
+    - {{ compiler('cuda11') }}
+    {% endif %}
+
+requirements:
+  build:
+    - {{ compiler('c') }}
+    - {{ compiler('cxx') }}
+    {% if cuda_major == "11" %}
+    - {{ compiler('cuda11') }} ={{ cuda_version }}
+    {% else %}
+    - {{ compiler('cuda') }}
+    {% endif %}
+    - cuda-version ={{ cuda_version }}
+    - cmake {{ cmake_version }}
+    - ninja
+    - sysroot_{{ target_platform }} {{ sysroot_version }}
+
+  host:
+    - python
+    - libraft {{ version }}
+    - cuda-version ={{ cuda_version }}
+    {% if cuda_major == "11" %}
+    - cuda-profiler-api {{ cuda11_cuda_profiler_api_run_version }}
+    - libcublas {{ cuda11_libcublas_host_version }}
+    - libcublas-dev {{ cuda11_libcublas_host_version }}
+    {% else %}
+    - cuda-profiler-api
+    - libcublas-dev
+    {% endif %}
+    - glog {{ glog_version }}
+    - nlohmann_json {{ nlohmann_json_version }}
+    # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet
+    {% if cuda_major == "11" %}
+    - faiss-proc=*=cuda
+    - libfaiss {{ faiss_version }}
+    {% endif %}
+
+  run:
+    - python
+    - libraft {{ version }}
+    - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
+    {% if cuda_major == "11" %}
+    - cudatoolkit
+    {% endif %}
+    - glog {{ glog_version }}
+    # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet
+    {% if cuda_major == "11" %}
+    - faiss-proc=*=cuda
+    - libfaiss {{ faiss_version }}
+    {% endif %}
+    - h5py {{ h5py_version }}
+
+about:
+  home: https://rapids.ai/
+  license: Apache-2.0
+  summary: RAFT ANN GPU and CPU benchmarks
diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml
index c9caa4dd9b..cf1f8488bc 100644
--- a/conda/recipes/raft-dask/meta.yaml
+++ b/conda/recipes/raft-dask/meta.yaml
@@ -60,10 +60,10 @@ requirements:
     - cudatoolkit
     {% endif %}
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
-    - dask >=2023.7.1
-    - dask-core >=2023.7.1
+    - dask ==2023.7.1
+    - dask-core ==2023.7.1
     - dask-cuda ={{ minor_version }}
-    - distributed >=2023.7.1
+    - distributed ==2023.7.1
     - joblib >=0.11
     - nccl >=2.9.9
     - pylibraft {{ version }}
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ae2660509a..ecb74ad306 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -17,18 +17,26 @@ cmake_minimum_required(VERSION 3.26.4 FATAL_ERROR)
 include(../fetch_rapids.cmake)
 include(rapids-cmake)
 include(rapids-cpm)
-include(rapids-cuda)
 include(rapids-export)
 include(rapids-find)
 
-rapids_cuda_init_architectures(RAFT)
+option(CPU_ONLY "Build CPU only components. Apples to RAFT ANN benchmarks currently" OFF)
+
+if(NOT CPU_ONLY)
+  include(rapids-cuda)
+  rapids_cuda_init_architectures(RAFT)
+endif()
 
 project(
   RAFT
   VERSION ${RAFT_VERSION}
-  LANGUAGES CXX CUDA
+  LANGUAGES CXX
 )
 
+if(NOT CPU_ONLY)
+  enable_language(CUDA)
+endif()
+
 # Write the version header
 rapids_cmake_write_version_file(include/raft/version_config.hpp)
 
@@ -60,9 +68,10 @@ option(DISABLE_OPENMP "Disable OpenMP" OFF)
 option(RAFT_NVTX "Enable nvtx markers" OFF)
 
 set(RAFT_COMPILE_LIBRARY_DEFAULT OFF)
-if(BUILD_TESTS
-   OR BUILD_PRIMS_BENCH
-   OR BUILD_ANN_BENCH
+if((BUILD_TESTS
+    OR BUILD_PRIMS_BENCH
+    OR BUILD_ANN_BENCH)
+   AND NOT CPU_ONLY
 )
   set(RAFT_COMPILE_LIBRARY_DEFAULT ON)
 endif()
@@ -70,6 +79,11 @@ option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations"
        ${RAFT_COMPILE_LIBRARY_DEFAULT}
 )
 
+if(CPU_ONLY)
+  set(BUILD_SHARED_LIBS OFF)
+  set(BUILD_TESTS OFF)
+endif()
+
 # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to
 # have different values for the `Threads::Threads` target. Setting this flag ensures
 # `Threads::Threads` is the same value across all builds so that cache hits occur
@@ -82,6 +96,8 @@ include(CMakeDependentOption)
 message(VERBOSE "RAFT: Building optional components: ${raft_FIND_COMPONENTS}")
 message(VERBOSE "RAFT: Build RAFT unit-tests: ${BUILD_TESTS}")
 message(VERBOSE "RAFT: Building raft C++ benchmarks: ${BUILD_PRIMS_BENCH}")
+message(VERBOSE "RAFT: Building ANN benchmarks: ${BUILD_ANN_BENCH}")
+message(VERBOSE "RAFT: Build CPU only components: ${CPU_ONLY}")
 message(VERBOSE "RAFT: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}")
 message(VERBOSE "RAFT: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS})
 message(VERBOSE "RAFT: Disable OpenMP: ${DISABLE_OPENMP}")
@@ -116,7 +132,7 @@ if(DETECT_CONDA_ENV)
 endif()
 
 # ##################################################################################################
-# * compiler options ---------------------------------------------------------
+# * compiler options ----------------------------------------------------------
 
 set(_ctk_static_suffix "")
 if(CUDA_STATIC_RUNTIME)
@@ -127,11 +143,7 @@ endif()
 rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
 
 if(NOT DISABLE_OPENMP)
-  rapids_find_package(
-    OpenMP REQUIRED
-    BUILD_EXPORT_SET raft-exports
-    INSTALL_EXPORT_SET raft-exports
-  )
+  find_package(OpenMP)
   if(OPENMP_FOUND)
     message(VERBOSE "RAFT: OpenMP found in ${OpenMP_CXX_INCLUDE_DIRS}")
   endif()
@@ -154,19 +166,21 @@ include(cmake/modules/ConfigureCUDA.cmake)
 # add third party dependencies using CPM
 rapids_cpm_init()
 
-# thrust before rmm/cuco so we get the right version of thrust/cub
-include(cmake/thirdparty/get_thrust.cmake)
-include(cmake/thirdparty/get_rmm.cmake)
-include(cmake/thirdparty/get_cutlass.cmake)
+if(NOT CPU_ONLY)
+  # thrust before rmm/cuco so we get the right version of thrust/cub
+  include(cmake/thirdparty/get_thrust.cmake)
+  include(cmake/thirdparty/get_rmm.cmake)
+  include(cmake/thirdparty/get_cutlass.cmake)
 
-include(${rapids-cmake-dir}/cpm/cuco.cmake)
-rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports)
+  include(${rapids-cmake-dir}/cpm/cuco.cmake)
+  rapids_cpm_cuco(BUILD_EXPORT_SET raft-exports INSTALL_EXPORT_SET raft-exports)
+endif()
 
 if(BUILD_TESTS)
   include(cmake/thirdparty/get_gtest.cmake)
 endif()
 
-if(BUILD_PRIMS_BENCH)
+if(BUILD_PRIMS_BENCH OR BUILD_ANN_BENCH)
   include(${rapids-cmake-dir}/cpm/gbench.cmake)
   rapids_cpm_gbench()
 endif()
@@ -180,8 +194,10 @@ target_include_directories(
   raft INTERFACE "$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:include>"
 )
 
-# Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
-target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass raft::Thrust)
+if(NOT CPU_ONLY)
+  # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
+  target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass raft::Thrust)
+endif()
 
 target_compile_features(raft INTERFACE cxx_std_17 $<BUILD_INTERFACE:cuda_std_17>)
 target_compile_options(
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 6977d77684..2df845636e 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -13,7 +13,7 @@
 # =============================================================================
 
 # ##################################################################################################
-# * compiler function -----------------------------------------------------------------------------
+# * benchmark options- -----------------------------------------------------------------------------
 
 option(RAFT_ANN_BENCH_USE_FAISS_BFKNN "Include faiss' brute-force knn algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
@@ -23,9 +23,25 @@ option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchm
 option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_HNSWLIB "Include hnsw algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_GGNN "Include ggnn algorithm in benchmark" ON)
+option(RAFT_ANN_BENCH_SINGLE_EXE
+       "Make a single executable with benchmark as shared library modules" OFF
+)
+
+# ##################################################################################################
+# * Process options ----------------------------------------------------------
 
 find_package(Threads REQUIRED)
 
+if(CPU_ONLY)
+  set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF)
+  set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF)
+  set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF)
+  set(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OFF)
+  set(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OFF)
+  set(RAFT_ANN_BENCH_USE_RAFT_CAGRA OFF)
+  set(RAFT_ANN_BENCH_USE_GGNN OFF)
+endif()
+
 # Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled.
 # https://github.com/rapidsai/raft/issues/1627
 if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0)
@@ -50,12 +66,25 @@ if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ
   set(RAFT_ANN_BENCH_USE_RAFT ON)
 endif()
 
+option(RAFT_ANN_BENCH_USE_MULTIGPU "Use multi-gpus (where possible) in benchmarks" OFF)
+
+message(VERBOSE "RAFT: Build ann-bench with FAISS_BFKNN: ${RAFT_ANN_BENCH_USE_FAISS_BFKNN}")
+message(VERBOSE "RAFT: Build ann-bench with FAISS_IVF_FLAT: ${RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT}")
+message(VERBOSE "RAFT: Build ann-bench with FAISS_IVF_PQ: ${RAFT_ANN_BENCH_USE_FAISS_IVF_PQ}")
+message(VERBOSE "RAFT: Build ann-bench with RAFT_IVF_FLAT: ${RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT}")
+message(VERBOSE "RAFT: Build ann-bench with RAFT_IVF_PQ: ${RAFT_ANN_BENCH_USE_RAFT_IVF_PQ}")
+message(VERBOSE "RAFT: Build ann-bench with RAFT_CAGRA: ${RAFT_ANN_BENCH_USE_RAFT_CAGRA}")
+message(VERBOSE "RAFT: Build ann-bench with HNSWLIB: ${RAFT_ANN_BENCH_USE_HNSWLIB}")
+message(VERBOSE "RAFT: Build ann-bench with GGNN: ${RAFT_ANN_BENCH_USE_GGNN}")
+message(VERBOSE "RAFT: Build ann-bench with MULTIGPU: ${RAFT_ANN_BENCH_USE_MULTIGPU}")
+
+# ##################################################################################################
+# * Fetch requirements -------------------------------------------------------------
+
 if(RAFT_ANN_BENCH_USE_HNSWLIB)
   include(cmake/thirdparty/get_hnswlib.cmake)
 endif()
 
-option(RAFT_ANN_BENCH_USE_MULTIGPU "Use multi-gpus (where possible) in benchmarks" OFF)
-
 include(cmake/thirdparty/get_nlohmann_json.cmake)
 
 if(RAFT_ANN_BENCH_USE_GGNN)
@@ -66,43 +95,60 @@ if(RAFT_ANN_BENCH_USE_FAISS)
   include(cmake/thirdparty/get_faiss.cmake)
 endif()
 
+# ##################################################################################################
+# * Configure tests function-------------------------------------------------------------
+
 function(ConfigureAnnBench)
 
   set(oneValueArgs NAME)
   set(multiValueArgs PATH LINKS CXXFLAGS INCLUDES)
 
+  if(NOT CPU_ONLY)
+    set(GPU_BUILD ON)
+  endif()
+
   cmake_parse_arguments(
     ConfigureAnnBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}
   )
 
   set(BENCH_NAME ${ConfigureAnnBench_NAME}_ANN_BENCH)
 
-  add_executable(
-    ${BENCH_NAME} ${ConfigureAnnBench_PATH} bench/ann/src/common/conf.cpp
-                  bench/ann/src/common/util.cpp
-  )
+  if(RAFT_ANN_BENCH_SINGLE_EXE)
+    add_library(${BENCH_NAME} SHARED ${ConfigureAnnBench_PATH})
+    string(TOLOWER ${BENCH_NAME} BENCH_LIB_NAME)
+    set_target_properties(${BENCH_NAME} PROPERTIES OUTPUT_NAME ${BENCH_LIB_NAME})
+    add_dependencies(${BENCH_NAME} ANN_BENCH)
+  else()
+    add_executable(${BENCH_NAME} ${ConfigureAnnBench_PATH})
+    target_compile_definitions(${BENCH_NAME} PRIVATE ANN_BENCH_BUILD_MAIN)
+    target_link_libraries(${BENCH_NAME} PRIVATE benchmark::benchmark)
+  endif()
+
   target_link_libraries(
     ${BENCH_NAME}
     PRIVATE raft::raft
             nlohmann_json::nlohmann_json
-            $<$<BOOL:${RAFT_ANN_BENCH_USE_MULTIGPU}>:NCCL::NCCL>
+            $<$<BOOL:GPU_BUILD>:$<$<BOOL:${RAFT_ANN_BENCH_USE_MULTIGPU}>:NCCL::NCCL>>
             ${ConfigureAnnBench_LINKS}
             Threads::Threads
-            ${RAFT_CTK_MATH_DEPENDENCIES}
+            $<$<BOOL:GPU_BUILD>:${RAFT_CTK_MATH_DEPENDENCIES}>
             $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
             $<TARGET_NAME_IF_EXISTS:conda_env>
+            -static-libgcc
+            -static-libstdc++
   )
 
   set_target_properties(
     ${BENCH_NAME}
     PROPERTIES # set target compile options
-               INSTALL_RPATH "\$ORIGIN/../../../lib"
                CXX_STANDARD 17
                CXX_STANDARD_REQUIRED ON
                CUDA_STANDARD 17
                CUDA_STANDARD_REQUIRED ON
                POSITION_INDEPENDENT_CODE ON
                INTERFACE_POSITION_INDEPENDENT_CODE ON
+               BUILD_RPATH "\$ORIGIN"
+               INSTALL_RPATH "\$ORIGIN"
   )
 
   set(${ConfigureAnnBench_CXXFLAGS} ${RAFT_CXX_FLAGS} ${ConfigureAnnBench_CXXFLAGS})
@@ -134,6 +180,9 @@ function(ConfigureAnnBench)
   )
 endfunction()
 
+# ##################################################################################################
+# * Configure tests-------------------------------------------------------------
+
 if(RAFT_ANN_BENCH_USE_HNSWLIB)
   ConfigureAnnBench(
     NAME HNSWLIB PATH bench/ann/src/hnswlib/hnswlib_benchmark.cpp INCLUDES
@@ -200,3 +249,58 @@ if(RAFT_ANN_BENCH_USE_GGNN)
     ${CMAKE_CURRENT_BINARY_DIR}/_deps/ggnn-src/include LINKS glog::glog
   )
 endif()
+
+# ##################################################################################################
+# * Dynamically-loading ANN_BENCH executable -------------------------------------------------------
+
+if(RAFT_ANN_BENCH_SINGLE_EXE)
+  add_executable(ANN_BENCH bench/ann/src/common/benchmark.cpp)
+
+  # Build and link static version of the GBench to keep ANN_BENCH self-contained.
+  get_target_property(TMP_PROP benchmark::benchmark SOURCES)
+  add_library(benchmark_static STATIC ${TMP_PROP})
+  get_target_property(TMP_PROP benchmark::benchmark INCLUDE_DIRECTORIES)
+  target_include_directories(benchmark_static PUBLIC ${TMP_PROP})
+  get_target_property(TMP_PROP benchmark::benchmark LINK_LIBRARIES)
+  target_link_libraries(benchmark_static PUBLIC ${TMP_PROP})
+
+  target_include_directories(ANN_BENCH PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+
+  target_link_libraries(
+    ANN_BENCH PRIVATE nlohmann_json::nlohmann_json benchmark_static dl -static-libgcc
+                      -static-libstdc++ CUDA::nvtx3
+  )
+  set_target_properties(
+    ANN_BENCH
+    PROPERTIES # set target compile options
+               CXX_STANDARD 17
+               CXX_STANDARD_REQUIRED ON
+               CUDA_STANDARD 17
+               CUDA_STANDARD_REQUIRED ON
+               POSITION_INDEPENDENT_CODE ON
+               INTERFACE_POSITION_INDEPENDENT_CODE ON
+               BUILD_RPATH "\$ORIGIN"
+               INSTALL_RPATH "\$ORIGIN"
+  )
+
+  # Disable NVTX when the nvtx3 headers are missing
+  set(_CMAKE_REQUIRED_INCLUDES_ORIG ${CMAKE_REQUIRED_INCLUDES})
+  get_target_property(CMAKE_REQUIRED_INCLUDES ANN_BENCH INCLUDE_DIRECTORIES)
+  CHECK_INCLUDE_FILE_CXX(nvtx3/nvToolsExt.h NVTX3_HEADERS_FOUND)
+  set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_ORIG})
+  target_compile_definitions(
+    ANN_BENCH
+    PRIVATE
+      $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}">
+      $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
+  )
+
+  target_link_options(ANN_BENCH PRIVATE -export-dynamic)
+
+  install(
+    TARGETS ANN_BENCH
+    COMPONENT ann_bench
+    DESTINATION bin/ann
+    EXCLUDE_FROM_ALL
+  )
+endif()
diff --git a/cpp/bench/ann/scripts/eval.pl b/cpp/bench/ann/scripts/eval.pl
deleted file mode 100755
index 81c5563d79..0000000000
--- a/cpp/bench/ann/scripts/eval.pl
+++ /dev/null
@@ -1,430 +0,0 @@
-#!/usr/bin/perl
-
-# =============================================================================
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-
-use warnings;
-use strict;
-use autodie qw(open close);
-use File::Find;
-use Getopt::Std;
-
-my $QPS = 'QPS';
-my $AVG_LATENCY = 'avg_latency(ms)';
-my $P99_LATENCY = 'p99_latency(ms)';
-my $P999_LATENCY = 'p999_latency(ms)';
-my @CONDITIONS = ([$QPS, 2000], ['recall', 0.9], ['recall', 0.95]);
-
-
-my $USAGE = << 'END';
-usage: [-f] [-l avg|p99|p999] [-o output.csv] groundtruth.neighbors.ibin result_paths...
-  result_paths... are paths to the search result files.
-    Can specify multiple paths.
-    For each of them, if it's a directory, all the .txt files found under
-    it recursively will be regarded as inputs.
-
-  -f: force to recompute recall and update it in result file if needed
-  -l: output search latency rather than QPS. Available options:
-        "avg" for average latency;
-        "p99" for 99th percentile latency;
-        "p999" for 99.9th percentile latency.
-  -o: also write result to a csv file
-END
-
-
-my %opt;
-getopts('fl:o:', \%opt)
-  or die $USAGE;
-my $force_calc_recall = exists $opt{f} ? 1 : 0;
-my $csv_file;
-$csv_file = $opt{o} if exists $opt{o};
-my $metric = $QPS;
-if (exists $opt{l}) {
-    my $option = $opt{l};
-    if ($option eq 'avg') {
-        $metric = $AVG_LATENCY;
-    }
-    elsif ($option eq 'p99') {
-        $metric = $P99_LATENCY;
-    }
-    elsif ($option eq 'p999') {
-        $metric = $P999_LATENCY;
-    }
-    else {
-        die
-          "[error] illegal value for '-l': '$option'. Must be 'avg', 'p99' or 'p999'\n";
-    }
-}
-
-@ARGV >= 2
-  or die $USAGE;
-
-
-my $truth_file = shift @ARGV;
-my ($k, $dataset, $distance, $results) = get_all_results($metric, @ARGV);
-if (!defined $k) {
-    print STDERR "no result file found\n";
-    exit -1;
-}
-print STDERR "dataset = $dataset, distance = $distance, k = $k\n\n";
-calc_missing_recall($results, $truth_file, $force_calc_recall);
-
-my @results = sort {
-         $a->{name} cmp $b->{name}
-      or $a->{recall} <=> $b->{recall}
-      or $b->{qps} <=> $a->{qps}
-} @$results;
-printf("%-60s  %6s %16s  %s\n", '', 'Recall', $metric, 'search_param');
-for my $result (@results) {
-    my $fmt = ($metric eq $QPS) ? '%16.1f' : '%16.3f';
-    my $qps = $result->{qps};
-    $qps *= 1000 if $metric ne $QPS;    # the unit of latency is ms
-    printf("%-60s  %6.4f ${fmt}  %s\n",
-        $result->{name}, $result->{recall}, $qps, $result->{search_param});
-}
-if (defined $csv_file) {
-    open my $fh, '>', $csv_file;
-    print {$fh} ",Recall,${metric},search_param\n";
-    for my $result (@results) {
-        my $qps = $result->{qps};
-        $qps *= 1000 if $metric ne $QPS;
-        printf {$fh} (
-            "%s,%.4f,%.3f,%s\n", $result->{name}, $result->{recall},
-            $qps, $result->{search_param}
-        );
-    }
-}
-print "\n";
-calc_and_print_estimation($results, $metric, \@CONDITIONS);
-
-
-
-
-sub read_result {
-    my ($fname) = @_;
-    open my $fh, '<', $fname;
-    my %attr;
-    while (<$fh>) {
-        chomp;
-        next if /^\s*$/;
-        my $pos = index($_, ':');
-        $pos != -1
-          or die "[error] no ':' is found: '$_'\n";
-        my $key = substr($_, 0, $pos);
-        my $val = substr($_, $pos + 1);
-        $key =~ s/^\s+|\s+$//g;
-        $val =~ s/^\s+|\s+$//g;
-
-        # old version benchmark compatible
-        if ($key eq 'search_time') {
-            $key = 'average_search_time';
-            $val *= $attr{batch_size};
-        }
-        $attr{$key} = $val;
-    }
-    return \%attr;
-}
-
-sub overwrite_recall_to_result {
-    my ($fname, $recall) = @_;
-    open my $fh_in, '<', $fname;
-    $recall = sprintf("%f", $recall);
-    my $out;
-    while (<$fh_in>) {
-        s/^recall: .*/recall: $recall/;
-        $out .= $_;
-    }
-    close $fh_in;
-
-    open my $fh_out, '>', $fname;
-    print {$fh_out} $out;
-}
-
-sub append_recall_to_result {
-    my ($fname, $recall) = @_;
-    open my $fh, '>>', $fname;
-    printf {$fh} ("recall: %f\n", $recall);
-}
-
-sub get_all_results {
-    my ($metric) = shift @_;
-
-    my %fname;
-    my $wanted = sub {
-        if (-f && /\.txt$/) {
-            $fname{$File::Find::name} = 1;
-        }
-    };
-    find($wanted, @_);
-
-    my $k;
-    my $dataset;
-    my $distance;
-    my @results;
-    for my $f (sort keys %fname) {
-        print STDERR "reading $f ...\n";
-        my $attr = read_result($f);
-        if (!defined $k) {
-            $k = $attr->{k};
-            $dataset = $attr->{dataset};
-            $distance = $attr->{distance};
-        }
-        else {
-            $attr->{k} eq $k
-              or die "[error] k should be $k, but is $attr->{k} in $f\n";
-            $attr->{dataset} eq $dataset
-              or die
-              "[error] dataset should be $dataset, but is $attr->{dataset} in $f\n";
-            $attr->{distance} eq $distance
-              or die
-              "[error] distance should be $distance, but is $attr->{distance} in $f\n";
-        }
-
-        my $batch_size = $attr->{batch_size};
-        $batch_size =~ s/000000$/M/;
-        $batch_size =~ s/000$/K/;
-        my $search_param = $attr->{search_param};
-        $search_param =~ s/^{//;
-        $search_param =~ s/}$//;
-        $search_param =~ s/,/ /g;
-        $search_param =~ s/"//g;
-
-        my $qps;
-        if ($metric eq $QPS) {
-            $qps = $attr->{batch_size} / $attr->{average_search_time};
-        }
-        elsif ($metric eq $AVG_LATENCY) {
-            $qps = $attr->{average_search_time};
-        }
-        elsif ($metric eq $P99_LATENCY) {
-            exists $attr->{p99_search_time}
-              or die "[error] p99_search_time is not found\n";
-            $qps = $attr->{p99_search_time};
-        }
-        elsif ($metric eq $P999_LATENCY) {
-            exists $attr->{p999_search_time}
-              or die "[error] p999_search_time is not found\n";
-            $qps = $attr->{p999_search_time};
-        }
-        else {
-            die "[error] unknown latency type: '$metric'\n";
-        }
-        my $result = {
-            file => $f,
-            name => "$attr->{name}-batch${batch_size}",
-            search_param => $search_param,
-            qps => $qps,
-        };
-
-        if (exists $attr->{recall}) {
-            $result->{recall} = $attr->{recall};
-        }
-        push @results, $result;
-    }
-    return $k, $dataset, $distance, \@results;
-}
-
-sub read_ibin {
-    my ($fname) = @_;
-
-    open my $fh, '<:raw', $fname;
-    my $raw;
-
-    read($fh, $raw, 8);
-    my ($nrows, $dim) = unpack('LL', $raw);
-
-    my $expected_size = 8 + $nrows * $dim * 4;
-    my $size = (stat($fh))[7];
-    $size == $expected_size
-      or die(
-        "[error] expected size is $expected_size, but actual size is $size\n");
-
-    read($fh, $raw, $nrows * $dim * 4) == $nrows * $dim * 4
-      or die "[error] read $fname failed\n";
-    my @data = unpack('l' x ($nrows * $dim), $raw);
-    return \@data, $nrows, $dim;
-}
-
-sub pick_k_neighbors {
-    my ($neighbors, $nrows, $ncols, $k) = @_;
-
-    my @res;
-    for my $i (0 .. $nrows - 1) {
-        my %neighbor_set;
-        for my $j (0 .. $k - 1) {
-            $neighbor_set{$neighbors->[$i * $ncols + $j]} = 1;
-        }
-        push @res, \%neighbor_set;
-    }
-    return \@res;
-}
-
-
-sub calc_recall {
-    my ($truth_k_neighbors, $result_neighbors, $nrows, $k) = @_;
-
-    my $recall = 0;
-    for my $i (0 .. $nrows - 1) {
-        my $tp = 0;
-        for my $j (0 .. $k - 1) {
-            my $neighbor = $result_neighbors->[$i * $k + $j];
-            ++$tp if exists $truth_k_neighbors->[$i]{$neighbor};
-        }
-        $recall += $tp;
-    }
-    return $recall / $k / $nrows;
-}
-
-sub calc_missing_recall {
-    my ($results, $truth_file, $force_calc_recall) = @_;
-
-    my $need_calc_recall = grep { !exists $_->{recall} } @$results;
-    return unless $need_calc_recall || $force_calc_recall;
-
-    my ($truth_neighbors, $nrows, $truth_k) = read_ibin($truth_file);
-    $truth_k >= $k
-      or die "[error] ground truth k ($truth_k) < k($k)\n";
-    my $truth_k_neighbors =
-      pick_k_neighbors($truth_neighbors, $nrows, $truth_k, $k);
-
-    for my $result (@$results) {
-        next if exists $result->{recall} && !$force_calc_recall;
-
-        my $result_bin_file = $result->{file};
-        $result_bin_file =~ s/txt$/ibin/;
-        print STDERR "calculating recall for $result_bin_file ...\n";
-        my ($result_neighbors, $result_nrows, $result_k) =
-          read_ibin($result_bin_file);
-        $result_k == $k
-          or die
-          "[error] k should be $k, but is $result_k in $result_bin_file\n";
-        $result_nrows == $nrows
-          or die
-          "[error] #row should be $nrows, but is $result_nrows in $result_bin_file\n";
-
-        my $recall =
-          calc_recall($truth_k_neighbors, $result_neighbors, $nrows, $k);
-        if (exists $result->{recall}) {
-            my $new_value = sprintf("%f", $recall);
-            if ($result->{recall} ne $new_value) {
-                print "update recall: $result->{recall} -> $new_value\n";
-                overwrite_recall_to_result($result->{file}, $recall);
-            }
-        }
-        else {
-            append_recall_to_result($result->{file}, $recall);
-        }
-        $result->{recall} = $recall;
-    }
-}
-
-
-sub estimate {
-    my ($results, $condition, $value) = @_;
-    my %point_of;
-    for my $result (@$results) {
-        my $point;
-        if ($condition eq 'recall') {
-            $point = [$result->{recall}, $result->{qps}];
-        }
-        else {
-            $point = [$result->{qps}, $result->{recall}];
-        }
-        push @{$point_of{$result->{name}}}, $point;
-    }
-
-    my @names = sort keys %point_of;
-    my @result;
-    for my $name (@names) {
-        my @points = sort { $a->[0] <=> $b->[0] } @{$point_of{$name}};
-        if ($value < $points[0][0] || $value > $points[$#points][0]) {
-            push @result, -1;
-            next;
-        }
-        elsif ($value == $points[0][0]) {
-            push @result, $points[0][1];
-            next;
-        }
-
-        for my $i (1 .. $#points) {
-            if ($points[$i][0] >= $value) {
-                push @result,
-                  linear_interpolation($value, @{$points[$i - 1]},
-                    @{$points[$i]});
-                last;
-            }
-        }
-    }
-    return \@names, \@result;
-}
-
-sub linear_interpolation {
-    my ($x, $x1, $y1, $x2, $y2) = @_;
-    return $y1 + ($x - $x1) * ($y2 - $y1) / ($x2 - $x1);
-}
-
-sub merge {
-    my ($all, $new, $scale) = @_;
-    @$all == @$new
-      or die "[error] length is not equal\n";
-    for my $i (0 .. @$all - 1) {
-        push @{$all->[$i]}, $new->[$i] * $scale;
-    }
-}
-
-sub calc_and_print_estimation {
-    my ($results, $metric, $conditions) = @_;
-
-    my @conditions = grep {
-        my $target = $_->[0];
-        if ($target eq 'recall' || $target eq $metric) {
-            1;
-        }
-        else {
-                 $target eq $QPS
-              || $target eq $AVG_LATENCY
-              || $target eq $P99_LATENCY
-              || $target eq $P999_LATENCY
-              or die "[error] unknown condition: '$target'\n";
-            0;
-        }
-    } @$conditions;
-
-    my @headers = map {
-        my $header;
-        if ($_->[0] eq 'recall') {
-            $header = $metric . '@recall' . $_->[1];
-        }
-        elsif ($_->[0] eq $metric) {
-            $header = 'recall@' . $metric . $_->[1];
-        }
-        $header;
-    } @conditions;
-
-    my $scale = ($metric eq $QPS) ? 1 : 1000;
-    my $estimations;
-    for my $condition (@conditions) {
-        my ($names, $estimate) = estimate($results, @$condition);
-        if (!defined $estimations) {
-            @$estimations = map { [$_] } @$names;
-        }
-        merge($estimations, $estimate, $scale);
-    }
-
-    my $fmt = "%-60s" . ("  %16s" x @headers) . "\n";
-    printf($fmt, '', @headers);
-    $fmt =~ s/16s/16.4f/g;
-    for (@$estimations) {
-        printf($fmt, @$_);
-    }
-}
diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp
index 93892de9df..e0c22d1798 100644
--- a/cpp/bench/ann/src/common/ann_types.hpp
+++ b/cpp/bench/ann/src/common/ann_types.hpp
@@ -1,3 +1,5 @@
+
+
 /*
  * Copyright (c) 2023, NVIDIA CORPORATION.
  *
@@ -15,40 +17,77 @@
  */
 
 #pragma once
+
+#include <stdexcept>
 #include <string>
 #include <vector>
 
-#include <cuda_runtime_api.h>
+#ifndef CPU_ONLY
+#include <cuda_runtime_api.h>  // cudaStream_t
+#endif
 
 namespace raft::bench::ann {
 
-enum class Metric {
-  kInnerProduct,
-  kEuclidean,
-};
-
 enum class MemoryType {
   Host,
   HostMmap,
   Device,
 };
 
+enum class Metric {
+  kInnerProduct,
+  kEuclidean,
+};
+
+inline auto parse_metric(const std::string& metric_str) -> Metric
+{
+  if (metric_str == "inner_product") {
+    return raft::bench::ann::Metric::kInnerProduct;
+  } else if (metric_str == "euclidean") {
+    return raft::bench::ann::Metric::kEuclidean;
+  } else {
+    throw std::runtime_error("invalid metric: '" + metric_str + "'");
+  }
+}
+
+inline auto parse_memory_type(const std::string& memory_type) -> MemoryType
+{
+  if (memory_type == "host") {
+    return MemoryType::Host;
+  } else if (memory_type == "mmap") {
+    return MemoryType::HostMmap;
+  } else if (memory_type == "device") {
+    return MemoryType::Device;
+  } else {
+    throw std::runtime_error("invalid memory type: '" + memory_type + "'");
+  }
+}
+
 struct AlgoProperty {
   MemoryType dataset_memory_type;
   // neighbors/distances should have same memory type as queries
   MemoryType query_memory_type;
-  bool need_dataset_when_search;
+};
+
+class AnnBase {
+ public:
+  inline AnnBase(Metric metric, int dim) : metric_(metric), dim_(dim) {}
+  virtual ~AnnBase() = default;
+
+ protected:
+  Metric metric_;
+  int dim_;
 };
 
 template <typename T>
-class ANN {
+class ANN : public AnnBase {
  public:
   struct AnnSearchParam {
     virtual ~AnnSearchParam() = default;
+    [[nodiscard]] virtual auto needs_dataset() const -> bool { return false; };
   };
 
-  ANN(Metric metric, int dim) : metric_(metric), dim_(dim) {}
-  virtual ~ANN() = default;
+  inline ANN(Metric metric, int dim) : AnnBase(metric, dim) {}
 
   virtual void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) = 0;
 
@@ -65,7 +104,7 @@ class ANN {
   virtual void save(const std::string& file) const = 0;
   virtual void load(const std::string& file)       = 0;
 
-  virtual AlgoProperty get_property() const = 0;
+  virtual AlgoProperty get_preference() const = 0;
 
   // Some algorithms don't save the building dataset in their indices.
   // So they should be given the access to that dataset during searching.
@@ -77,10 +116,14 @@ class ANN {
   // The client code should call set_search_dataset() before searching,
   // and should not release dataset before searching is finished.
   virtual void set_search_dataset(const T* /*dataset*/, size_t /*nrow*/){};
-
- protected:
-  Metric metric_;
-  int dim_;
 };
 
 }  // namespace raft::bench::ann
+
+#define REGISTER_ALGO_INSTANCE(DataT)                                                            \
+  template auto raft::bench::ann::create_algo<DataT>(                                            \
+    const std::string&, const std::string&, int, const nlohmann::json&, const std::vector<int>&) \
+    ->std::unique_ptr<raft::bench::ann::ANN<DataT>>;                                             \
+  template auto raft::bench::ann::create_search_param<DataT>(const std::string&,                 \
+                                                             const nlohmann::json&)              \
+    ->std::unique_ptr<typename raft::bench::ann::ANN<DataT>::AnnSearchParam>;
diff --git a/cpp/bench/ann/src/common/benchmark.cpp b/cpp/bench/ann/src/common/benchmark.cpp
new file mode 100644
index 0000000000..46e3bec5bb
--- /dev/null
+++ b/cpp/bench/ann/src/common/benchmark.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cuda_stub.hpp"  // must go first
+
+#include "ann_types.hpp"
+
+#define JSON_DIAGNOSTICS 1
+#include <nlohmann/json.hpp>
+
+#include <memory>
+#include <unordered_map>
+
+#include <dlfcn.h>
+#include <filesystem>
+
+namespace raft::bench::ann {
+
+struct lib_handle {
+  void* handle{nullptr};
+  explicit lib_handle(const std::string& name)
+  {
+    handle = dlopen(name.c_str(), RTLD_LAZY | RTLD_LOCAL);
+    if (handle == nullptr) {
+      auto error_msg = "Failed to load " + name;
+      auto err       = dlerror();
+      if (err != nullptr && err[0] != '\0') { error_msg += ": " + std::string(err); }
+      throw std::runtime_error(error_msg);
+    }
+  }
+  ~lib_handle() noexcept
+  {
+    if (handle != nullptr) { dlclose(handle); }
+  }
+};
+
+auto load_lib(const std::string& algo) -> void*
+{
+  static std::unordered_map<std::string, lib_handle> libs{};
+  auto found = libs.find(algo);
+
+  if (found != libs.end()) { return found->second.handle; }
+  auto lib_name = "lib" + algo + "_ann_bench.so";
+  return libs.emplace(algo, lib_name).first->second.handle;
+}
+
+auto get_fun_name(void* addr) -> std::string
+{
+  Dl_info dl_info;
+  if (dladdr(addr, &dl_info) != 0) {
+    if (dl_info.dli_sname != nullptr && dl_info.dli_sname[0] != '\0') {
+      return std::string{dl_info.dli_sname};
+    }
+  }
+  throw std::logic_error("Failed to find out name of the looked up function");
+}
+
+template <typename T>
+auto create_algo(const std::string& algo,
+                 const std::string& distance,
+                 int dim,
+                 const nlohmann::json& conf,
+                 const std::vector<int>& dev_list,
+                 const nlohman::json& index_conf) -> std::unique_ptr<raft::bench::ann::ANN<T>>
+{
+  static auto fname = get_fun_name(reinterpret_cast<void*>(&create_algo<T>));
+  auto handle       = load_lib(algo);
+  auto fun_addr     = dlsym(handle, fname.c_str());
+  if (fun_addr == nullptr) {
+    throw std::runtime_error("Couldn't load the create_algo function (" + algo + ")");
+  }
+  auto fun = reinterpret_cast<decltype(&create_algo<T>)>(fun_addr);
+  return fun(algo, distance, dim, conf, dev_list, index_conf);
+}
+
+template <typename T>
+std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search_param(
+  const std::string& algo, const nlohmann::json& conf)
+{
+  static auto fname = get_fun_name(reinterpret_cast<void*>(&create_search_param<T>));
+  auto handle       = load_lib(algo);
+  auto fun_addr     = dlsym(handle, fname.c_str());
+  if (fun_addr == nullptr) {
+    throw std::runtime_error("Couldn't load the create_search_param function (" + algo + ")");
+  }
+  auto fun = reinterpret_cast<decltype(&create_search_param<T>)>(fun_addr);
+  return fun(algo, conf);
+}
+
+};  // namespace raft::bench::ann
+
+REGISTER_ALGO_INSTANCE(float);
+REGISTER_ALGO_INSTANCE(std::int8_t);
+REGISTER_ALGO_INSTANCE(std::uint8_t);
+
+#include "benchmark.hpp"
+
+int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index 971e6a3fd3..6a8c6b933e 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -13,598 +13,526 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifdef NVTX
-#include <nvtx3/nvToolsExt.h>
-#endif
-#include <unistd.h>
+#pragma once
+
+#include "ann_types.hpp"
+#include "conf.hpp"
+#include "dataset.hpp"
+#include "util.hpp"
+
+#include <benchmark/benchmark.h>
 
 #include <algorithm>
 #include <cmath>
 #include <cstdint>
 #include <fstream>
-#include <iostream>
 #include <limits>
 #include <memory>
 #include <numeric>
 #include <string>
-#include <unordered_set>
+#include <unistd.h>
 #include <vector>
 
-#include <raft/util/integer_utils.hpp>
+#ifdef ANN_BENCH_BUILD_MAIN
+#ifdef CPU_ONLY
+#define CUDART_FOUND false
+#else
+#define CUDART_FOUND true
+#endif
+#else
+#define CUDART_FOUND (cudart.found())
+#endif
 
-#include "benchmark_util.hpp"
-#include "conf.h"
-#include "dataset.h"
-#include "util.h"
+namespace raft::bench::ann {
 
-using std::cerr;
-using std::cout;
-using std::endl;
-using std::string;
-using std::to_string;
-using std::unordered_set;
-using std::vector;
+static inline std::unique_ptr<AnnBase> current_algo{nullptr};
 
-namespace raft::bench::ann {
+using kv_series = std::vector<std::tuple<std::string, std::vector<nlohmann::json>>>;
 
-inline bool check_file_exist(const std::vector<string>& files)
+inline auto apply_overrides(const std::vector<nlohmann::json>& configs,
+                            const kv_series& overrides,
+                            std::size_t override_idx = 0) -> std::vector<nlohmann::json>
 {
-  bool ret = true;
-  std::unordered_set<std::string> processed;
-  for (const auto& file : files) {
-    if (processed.find(file) == processed.end() && !file_exists(file)) {
-      log_error("file '%s' doesn't exist or is not a regular file", file.c_str());
-      ret = false;
+  std::vector<nlohmann::json> results{};
+  if (override_idx >= overrides.size()) {
+    auto n = configs.size();
+    for (size_t i = 0; i < n; i++) {
+      auto c               = configs[i];
+      c["override_suffix"] = n > 1 ? "/" + std::to_string(i) : "";
+      results.push_back(c);
     }
-    processed.insert(file);
+    return results;
   }
-  return ret;
-}
-
-inline bool check_file_not_exist(const std::vector<std::string>& files, bool force_overwrite)
-{
-  bool ret = true;
-  for (const auto& file : files) {
-    if (file_exists(file)) {
-      if (force_overwrite) {
-        log_warn("'%s' already exists, will overwrite it", file.c_str());
-      } else {
-        log_error("'%s' already exists, use '-f' to force overwriting", file.c_str());
-        ret = false;
+  auto rec_configs = apply_overrides(configs, overrides, override_idx + 1);
+  auto [key, vals] = overrides[override_idx];
+  auto n           = vals.size();
+  for (size_t i = 0; i < n; i++) {
+    const auto& val = vals[i];
+    for (auto rc : rec_configs) {
+      if (n > 1) {
+        rc["override_suffix"] =
+          static_cast<std::string>(rc["override_suffix"]) + "/" + std::to_string(i);
       }
+      rc[key] = val;
+      results.push_back(rc);
     }
   }
-  return ret;
+  return results;
 }
 
-inline bool check_no_duplicate_file(const std::vector<std::string>& files)
+inline auto apply_overrides(const nlohmann::json& config,
+                            const kv_series& overrides,
+                            std::size_t override_idx = 0)
 {
-  bool ret = true;
-  std::unordered_set<string> processed;
-  for (const auto& file : files) {
-    if (processed.find(file) != processed.end()) {
-      log_error("'%s' occurs more than once as output file, would be overwritten", file.c_str());
-      ret = false;
-    }
-    processed.insert(file);
-  }
-  return ret;
+  return apply_overrides(std::vector{config}, overrides, 0);
 }
 
-inline bool mkdir(const std::vector<std::string>& dirs)
+inline void dump_parameters(::benchmark::State& state, nlohmann::json params)
 {
-  std::unordered_set<string> processed;
-  for (const auto& dir : dirs) {
-    if (processed.find(dir) == processed.end() && !dir_exists(dir)) {
-      if (create_dir(dir)) {
-        log_info("mkdir '%s'", dir.c_str());
+  std::string label = "";
+  bool label_empty  = true;
+  for (auto& [key, val] : params.items()) {
+    if (val.is_number()) {
+      state.counters.insert({{key, val}});
+    } else if (val.is_boolean()) {
+      state.counters.insert({{key, val ? 1.0 : 0.0}});
+    } else {
+      auto kv = key + "=" + val.dump();
+      if (label_empty) {
+        label = kv;
       } else {
-        log_error("fail to create output directory '%s'", dir.c_str());
-        // won't create any other dir when problem occurs
-        return false;
+        label += "#" + kv;
       }
+      label_empty = false;
     }
-    processed.insert(dir);
   }
-  return true;
+  if (!label_empty) { state.SetLabel(label); }
 }
 
-inline bool check(const std::vector<Configuration::Index>& indices,
-                  const bool build_mode,
-                  const bool force_overwrite)
+inline auto parse_algo_property(AlgoProperty prop, const nlohmann::json& conf) -> AlgoProperty
 {
-  std::vector<std::string> files_should_exist;
-  std::vector<std::string> dirs_should_exist;
-  std::vector<std::string> output_files;
-  for (const auto& index : indices) {
-    if (build_mode) {
-      output_files.push_back(index.file);
-      output_files.push_back(index.file + ".txt");
-
-      const auto pos = index.file.rfind('/');
-      if (pos != std::string::npos) { dirs_should_exist.push_back(index.file.substr(0, pos)); }
-    } else {
-      files_should_exist.push_back(index.file);
-      files_should_exist.push_back(index.file + ".txt");
-
-      output_files.push_back(index.search_result_file + ".0.ibin");
-      output_files.push_back(index.search_result_file + ".0.txt");
-
-      const auto pos = index.search_result_file.rfind('/');
-      if (pos != std::string::npos) {
-        dirs_should_exist.push_back(index.search_result_file.substr(0, pos));
-      }
-    }
+  if (conf.contains("dataset_memory_type")) {
+    prop.dataset_memory_type = parse_memory_type(conf.at("dataset_memory_type"));
   }
-
-  bool ret = true;
-  if (!check_file_exist(files_should_exist)) { ret = false; }
-  if (!check_file_not_exist(output_files, force_overwrite)) { ret = false; }
-  if (!check_no_duplicate_file(output_files)) { ret = false; }
-  if (ret && !mkdir(dirs_should_exist)) { ret = false; }
-  return ret;
-}
-
-inline void write_build_info(const std::string& file_prefix,
-                             const std::string& dataset,
-                             const std::string& distance,
-                             const std::string& name,
-                             const std::string& algo,
-                             const std::string& build_param,
-                             const float build_time)
-{
-  std::ofstream ofs(file_prefix + ".txt");
-  if (!ofs) { throw std::runtime_error("can't open build info file: " + file_prefix + ".txt"); }
-  ofs << "dataset: " << dataset << "\n"
-      << "distance: " << distance << "\n"
-      << "\n"
-      << "name: " << name << "\n"
-      << "algo: " << algo << "\n"
-      << "build_param: " << build_param << "\n"
-      << "build_time: " << build_time << endl;
-  ofs.close();
-  if (!ofs) { throw std::runtime_error("can't write to build info file: " + file_prefix + ".txt"); }
-}
+  if (conf.contains("query_memory_type")) {
+    prop.query_memory_type = parse_memory_type(conf.at("query_memory_type"));
+  }
+  return prop;
+};
 
 template <typename T>
-void build(const Dataset<T>* dataset, const std::vector<Configuration::Index>& indices)
+void bench_build(::benchmark::State& state,
+                 std::shared_ptr<const Dataset<T>> dataset,
+                 Configuration::Index index,
+                 bool force_overwrite)
 {
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-
-  log_info(
-    "base set from dataset '%s', #vector = %zu", dataset->name().c_str(), dataset->base_set_size());
-
-  for (const auto& index : indices) {
-    log_info("creating algo '%s', param=%s", index.algo.c_str(), index.build_param.dump().c_str());
-    const auto algo          = create_algo<T>(index.algo,
-                                     dataset->distance(),
-                                     dataset->dim(),
-                                     index.refine_ratio,
-                                     index.build_param,
-                                     index.dev_list,
-                                     index.index_conf);
-    const auto algo_property = algo->get_property();
-
-    const T* base_set_ptr = nullptr;
-    if (algo_property.dataset_memory_type == MemoryType::Host) {
-      log_info("%s", "loading base set to memory");
-      base_set_ptr = dataset->base_set();
-    } else if (algo_property.dataset_memory_type == MemoryType::HostMmap) {
-      log_info("%s", "mapping base set to memory");
-      base_set_ptr = dataset->mapped_base_set();
-    } else if (algo_property.dataset_memory_type == MemoryType::Device) {
-      log_info("%s", "loading base set to GPU");
-      base_set_ptr = dataset->base_set_on_gpu();
+  if (file_exists(index.file)) {
+    if (force_overwrite) {
+      log_info("Overwriting file: %s", index.file.c_str());
+    } else {
+      return state.SkipWithMessage(
+        "Index file already exists (use --overwrite to overwrite the index).");
     }
-
-    log_info("building index '%s'", index.name.c_str());
-    RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-#ifdef NVTX
-    nvtxRangePush("build");
-#endif
-    Timer timer;
-    algo->build(base_set_ptr, dataset->base_set_size(), stream);
-    RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-    const float elapsed_ms = timer.elapsed_ms();
-#ifdef NVTX
-    nvtxRangePop();
-#endif
-    log_info("built index in %.2f seconds", elapsed_ms / 1000.0f);
-    RAFT_CUDA_TRY(cudaDeviceSynchronize());
-    RAFT_CUDA_TRY(cudaPeekAtLastError());
-
-    algo->save(index.file);
-    write_build_info(index.file,
-                     dataset->name(),
-                     dataset->distance(),
-                     index.name,
-                     index.algo,
-                     index.build_param.dump(),
-                     elapsed_ms / 1000.0f);
-    log_info("saved index to %s", index.file.c_str());
   }
 
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
-}
-
-inline void write_search_result(const std::string& file_prefix,
-                                const std::string& dataset,
-                                const std::string& distance,
-                                const std::string& name,
-                                const std::string& algo,
-                                const std::string& build_param,
-                                const std::string& search_param,
-                                std::size_t batch_size,
-                                unsigned run_count,
-                                unsigned k,
-                                float search_time_average,
-                                float search_time_p99,
-                                float search_time_p999,
-                                float query_per_second,
-                                const int* neighbors,
-                                size_t query_set_size)
-{
-  log_info("throughput : %e [QPS]", query_per_second);
-  std::ofstream ofs(file_prefix + ".txt");
-  if (!ofs) { throw std::runtime_error("can't open search result file: " + file_prefix + ".txt"); }
-  ofs << "dataset: " << dataset << "\n"
-      << "distance: " << distance << "\n"
-      << "\n"
-      << "name: " << name << "\n"
-      << "algo: " << algo << "\n"
-      << "build_param: " << build_param << "\n"
-      << "search_param: " << search_param << "\n"
-      << "\n"
-      << "batch_size: " << batch_size << "\n"
-      << "run_count: " << run_count << "\n"
-      << "k: " << k << "\n"
-      << "query_per_second: " << query_per_second << "\n"
-      << "average_search_time: " << search_time_average << endl;
-
-  if (search_time_p99 != std::numeric_limits<float>::max()) {
-    ofs << "p99_search_time: " << search_time_p99 << endl;
-  }
-  if (search_time_p999 != std::numeric_limits<float>::max()) {
-    ofs << "p999_search_time: " << search_time_p999 << endl;
+  std::unique_ptr<ANN<T>> algo;
+  try {
+    algo = ann::create_algo<T>(
+      index.algo, dataset->distance(), dataset->dim(), index.build_param, index.dev_list);
+  } catch (const std::exception& e) {
+    return state.SkipWithError("Failed to create an algo: " + std::string(e.what()));
   }
 
-  ofs.close();
-  if (!ofs) {
-    throw std::runtime_error("can't write to search result file: " + file_prefix + ".txt");
+  const auto algo_property = parse_algo_property(algo->get_preference(), index.build_param);
+
+  const T* base_set      = dataset->base_set(algo_property.dataset_memory_type);
+  std::size_t index_size = dataset->base_set_size();
+
+  cuda_timer gpu_timer;
+  {
+    nvtx_case nvtx{state.name()};
+    for (auto _ : state) {
+      [[maybe_unused]] auto ntx_lap = nvtx.lap();
+      [[maybe_unused]] auto gpu_lap = gpu_timer.lap();
+      try {
+        algo->build(base_set, index_size, gpu_timer.stream());
+      } catch (const std::exception& e) {
+        state.SkipWithError(std::string(e.what()));
+      }
+    }
   }
+  state.counters.insert(
+    {{"GPU Time", gpu_timer.total_time() / state.iterations()}, {"index_size", index_size}});
+  dump_parameters(state, index.build_param);
 
-  BinFile<int> neighbors_file(file_prefix + ".ibin", "w");
-  neighbors_file.write(neighbors, query_set_size, k);
+  if (state.skipped()) { return; }
+  make_sure_parent_dir_exists(index.file);
+  algo->save(index.file);
 }
 
 template <typename T>
-inline void search(const Dataset<T>* dataset, const std::vector<Configuration::Index>& indices)
+void bench_search(::benchmark::State& state,
+                  std::shared_ptr<const Dataset<T>> dataset,
+                  Configuration::Index index,
+                  std::size_t search_param_ix)
 {
-  if (indices.empty()) { return; }
-  cudaStream_t stream;
-  RAFT_CUDA_TRY(cudaStreamCreate(&stream));
-
-  log_info("loading query set from dataset '%s', #vector = %zu",
-           dataset->name().c_str(),
-           dataset->query_set_size());
-  const T* const query_set = dataset->query_set();
-  // query set is usually much smaller than base set, so load it eagerly
-  const T* const d_query_set  = dataset->query_set_on_gpu();
-  const size_t query_set_size = dataset->query_set_size();
-
-  // currently all indices has same batch_size, k and run_count
-  const std::size_t batch_size = indices[0].batch_size;
-  const unsigned k             = indices[0].k;
-  const unsigned run_count     = indices[0].run_count;
-  log_info(
-    "basic search parameters: batch_size = %d, k = %d, run_count = %d", batch_size, k, run_count);
-  if (query_set_size % batch_size != 0) {
-    log_warn("query set size (%zu) % batch size (%d) != 0, the size of last batch is %zu",
-             query_set_size,
-             batch_size,
-             query_set_size % batch_size);
+  const auto& sp_json = index.search_params[search_param_ix];
+
+  // NB: `k` and `n_queries` are guaranteed to be populated in conf.cpp
+  const std::uint32_t k = sp_json["k"];
+  // Amount of data processes in one go
+  const std::size_t n_queries = sp_json["n_queries"];
+  // Round down the query data to a multiple of the batch size to loop over full batches of data
+  const std::size_t query_set_size = (dataset->query_set_size() / n_queries) * n_queries;
+
+  if (!file_exists(index.file)) {
+    state.SkipWithError("Index file is missing. Run the benchmark in the build mode first.");
+    return;
+  }
+  // algo is static to cache it between close search runs to save time on index loading
+  static std::string index_file = "";
+  if (index.file != index_file) {
+    current_algo.reset();
+    index_file = index.file;
   }
-  const std::size_t num_batches = (query_set_size - 1) / batch_size + 1;
-  std::size_t* const neighbors  = new std::size_t[query_set_size * k];
-  int* const neighbors_buf      = new int[query_set_size * k];
-  float* const distances        = new float[query_set_size * k];
-  std::vector<float> search_times;
-  search_times.reserve(num_batches);
-  std::size_t* d_neighbors;
-  float* d_distances;
-  RAFT_CUDA_TRY(cudaMalloc((void**)&d_neighbors, query_set_size * k * sizeof(*d_neighbors)));
-  RAFT_CUDA_TRY(cudaMalloc((void**)&d_distances, query_set_size * k * sizeof(*d_distances)));
-
-  for (const auto& index : indices) {
-    log_info("creating algo '%s', param=%s", index.algo.c_str(), index.build_param.dump().c_str());
-    const auto algo          = create_algo<T>(index.algo,
-                                     dataset->distance(),
-                                     dataset->dim(),
-                                     index.refine_ratio,
-                                     index.build_param,
-                                     index.dev_list,
-                                     index.index_conf);
-    const auto algo_property = algo->get_property();
-
-    log_info("loading index '%s' from file '%s'", index.name.c_str(), index.file.c_str());
-    algo->load(index.file);
-
-    const T* this_query_set     = query_set;
-    std::size_t* this_neighbors = neighbors;
-    float* this_distances       = distances;
-    if (algo_property.query_memory_type == MemoryType::Device) {
-      this_query_set = d_query_set;
-      this_neighbors = d_neighbors;
-      this_distances = d_distances;
+  ANN<T>* algo;
+  std::unique_ptr<typename ANN<T>::AnnSearchParam> search_param;
+  try {
+    if (!current_algo || (algo = dynamic_cast<ANN<T>*>(current_algo.get())) == nullptr) {
+      auto ualgo = ann::create_algo<T>(
+        index.algo, dataset->distance(), dataset->dim(), index.build_param, index.dev_list);
+      algo = ualgo.get();
+      algo->load(index_file);
+      current_algo = std::move(ualgo);
     }
-
-    if (algo_property.need_dataset_when_search) {
-      log_info("loading base set from dataset '%s', #vector = %zu",
-               dataset->name().c_str(),
-               dataset->base_set_size());
-      const T* base_set_ptr = nullptr;
-      if (algo_property.dataset_memory_type == MemoryType::Host) {
-        log_info("%s", "loading base set to memory");
-        base_set_ptr = dataset->base_set();
-      } else if (algo_property.dataset_memory_type == MemoryType::HostMmap) {
-        log_info("%s", "mapping base set to memory");
-        base_set_ptr = dataset->mapped_base_set();
-      } else if (algo_property.dataset_memory_type == MemoryType::Device) {
-        log_info("%s", "loading base set to GPU");
-        base_set_ptr = dataset->base_set_on_gpu();
-      }
-      algo->set_search_dataset(base_set_ptr, dataset->base_set_size());
+    search_param = ann::create_search_param<T>(index.algo, sp_json);
+  } catch (const std::exception& e) {
+    return state.SkipWithError("Failed to create an algo: " + std::string(e.what()));
+  }
+  algo->set_search_param(*search_param);
+
+  const auto algo_property = parse_algo_property(algo->get_preference(), sp_json);
+  const T* query_set       = dataset->query_set(algo_property.query_memory_type);
+  buf<float> distances{algo_property.query_memory_type, k * query_set_size};
+  buf<std::size_t> neighbors{algo_property.query_memory_type, k * query_set_size};
+
+  if (search_param->needs_dataset()) {
+    try {
+      algo->set_search_dataset(dataset->base_set(algo_property.dataset_memory_type),
+                               dataset->base_set_size());
+    } catch (const std::exception&) {
+      state.SkipWithError("The algorithm '" + index.name +
+                          "' requires the base set, but it's not available.");
+      return;
     }
+  }
 
-    for (int i = 0, end_i = index.search_params.size(); i != end_i; ++i) {
-      const auto p_param = create_search_param<T>(index.algo, index.search_params[i]);
-      algo->set_search_param(*p_param);
-      log_info("search with param: %s", index.search_params[i].dump().c_str());
-
-      if (algo_property.query_memory_type == MemoryType::Device) {
-        RAFT_CUDA_TRY(cudaMemset(d_neighbors, 0, query_set_size * k * sizeof(*d_neighbors)));
-        RAFT_CUDA_TRY(cudaMemset(d_distances, 0, query_set_size * k * sizeof(*d_distances)));
-      } else {
-        memset(neighbors, 0, query_set_size * k * sizeof(*neighbors));
-        memset(distances, 0, query_set_size * k * sizeof(*distances));
+  std::ptrdiff_t batch_offset   = 0;
+  std::size_t queries_processed = 0;
+  cuda_timer gpu_timer;
+  {
+    nvtx_case nvtx{state.name()};
+    for (auto _ : state) {
+      // measure the GPU time using the RAII helper
+      [[maybe_unused]] auto ntx_lap = nvtx.lap();
+      [[maybe_unused]] auto gpu_lap = gpu_timer.lap();
+      // run the search
+      try {
+        algo->search(query_set + batch_offset * dataset->dim(),
+                     n_queries,
+                     k,
+                     neighbors.data + batch_offset * k,
+                     distances.data + batch_offset * k,
+                     gpu_timer.stream());
+      } catch (const std::exception& e) {
+        state.SkipWithError(std::string(e.what()));
       }
-
-      float best_search_time_average = std::numeric_limits<float>::max();
-      float best_search_time_p99     = std::numeric_limits<float>::max();
-      float best_search_time_p999    = std::numeric_limits<float>::max();
-      float total_search_time        = 0;
-      for (unsigned run = 0; run < run_count; ++run) {
-        log_info("run %d / %d", run + 1, run_count);
-        for (std::size_t batch_id = 0; batch_id < num_batches; ++batch_id) {
-          const std::size_t row = batch_id * batch_size;
-          const std::size_t actual_batch_size =
-            (batch_id == num_batches - 1) ? query_set_size - row : batch_size;
-          RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-#ifdef NVTX
-          string nvtx_label = "batch" + to_string(batch_id);
-          if (run_count != 1) { nvtx_label = "run" + to_string(run) + "-" + nvtx_label; }
-          if (batch_id == 10) {
-            run = run_count - 1;
+      // advance to the next batch
+      batch_offset = (batch_offset + n_queries) % query_set_size;
+      queries_processed += n_queries;
+    }
+  }
+  state.SetItemsProcessed(queries_processed);
+  state.counters.insert({{"k", k}, {"n_queries", n_queries}});
+  if (CUDART_FOUND) {
+    state.counters.insert({{"GPU Time", gpu_timer.total_time() / state.iterations()},
+                           {"GPU QPS", queries_processed / gpu_timer.total_time()}});
+  }
+  dump_parameters(state, sp_json);
+  if (state.skipped()) { return; }
+
+  // evaluate recall
+  if (dataset->max_k() >= k) {
+    const std::int32_t* gt          = dataset->gt_set();
+    const std::uint32_t max_k       = dataset->max_k();
+    buf<std::size_t> neighbors_host = neighbors.move(MemoryType::Host);
+
+    std::size_t rows        = std::min(queries_processed, query_set_size);
+    std::size_t match_count = 0;
+    std::size_t total_count = rows * static_cast<size_t>(k);
+    for (std::size_t i = 0; i < rows; i++) {
+      for (std::uint32_t j = 0; j < k; j++) {
+        auto act_idx = std::int32_t(neighbors_host.data[i * k + j]);
+        for (std::uint32_t l = 0; l < k; l++) {
+          auto exp_idx = gt[i * max_k + l];
+          if (act_idx == exp_idx) {
+            match_count++;
             break;
           }
-#endif
-          Timer timer;
-#ifdef NVTX
-          nvtxRangePush(nvtx_label.c_str());
-#endif
-          algo->search(this_query_set + row * dataset->dim(),
-                       actual_batch_size,
-                       k,
-                       this_neighbors + row * k,
-                       this_distances + row * k,
-                       stream);
-          RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
-          const float elapsed_ms = timer.elapsed_ms();
-#ifdef NVTX
-          nvtxRangePop();
-#endif
-          // If the size of the last batch is less than batch_size, don't count it for
-          // search time. But neighbors of the last batch will still be filled, so it's
-          // counted for recall calculation.
-          if (actual_batch_size == batch_size) {
-            search_times.push_back(elapsed_ms / 1000.0f);  // in seconds
-          }
-        }
-
-        const float total_search_time_run =
-          std::accumulate(search_times.cbegin(), search_times.cend(), 0.0f);
-        const float search_time_average = total_search_time_run / search_times.size();
-        total_search_time += total_search_time_run;
-        best_search_time_average = std::min(best_search_time_average, search_time_average);
-
-        if (search_times.size() >= 100) {
-          std::sort(search_times.begin(), search_times.end());
-
-          const auto calc_percentile_pos = [](float percentile, size_t N) {
-            return static_cast<size_t>(std::ceil(percentile / 100.0 * N)) - 1;
-          };
-
-          const float search_time_p99 = search_times[calc_percentile_pos(99, search_times.size())];
-          best_search_time_p99        = std::min(best_search_time_p99, search_time_p99);
-
-          if (search_times.size() >= 1000) {
-            const float search_time_p999 =
-              search_times[calc_percentile_pos(99.9, search_times.size())];
-            best_search_time_p999 = std::min(best_search_time_p999, search_time_p999);
-          }
         }
-        search_times.clear();
       }
-      RAFT_CUDA_TRY(cudaDeviceSynchronize());
-      RAFT_CUDA_TRY(cudaPeekAtLastError());
-      const auto query_per_second =
-        (run_count * raft::round_down_safe(query_set_size, batch_size)) / total_search_time;
-
-      if (algo_property.query_memory_type == MemoryType::Device) {
-        RAFT_CUDA_TRY(cudaMemcpy(neighbors,
-                                 d_neighbors,
-                                 query_set_size * k * sizeof(*d_neighbors),
-                                 cudaMemcpyDeviceToHost));
-        RAFT_CUDA_TRY(cudaMemcpy(distances,
-                                 d_distances,
-                                 query_set_size * k * sizeof(*d_distances),
-                                 cudaMemcpyDeviceToHost));
-      }
-
-      for (std::size_t j = 0; j < query_set_size * k; ++j) {
-        neighbors_buf[j] = neighbors[j];
-      }
-      write_search_result(index.search_result_file + "." + to_string(i),
-                          dataset->name(),
-                          dataset->distance(),
-                          index.name,
-                          index.algo,
-                          index.build_param.dump(),
-                          index.search_params[i].dump(),
-                          batch_size,
-                          index.run_count,
-                          k,
-                          best_search_time_average,
-                          best_search_time_p99,
-                          best_search_time_p999,
-                          query_per_second,
-                          neighbors_buf,
-                          query_set_size);
     }
-
-    log_info("finish searching for index '%s'", index.name.c_str());
+    double actual_recall = static_cast<double>(match_count) / static_cast<double>(total_count);
+    state.counters.insert({{"Recall", actual_recall}});
   }
+}
 
-  delete[] neighbors;
-  delete[] neighbors_buf;
-  delete[] distances;
-  RAFT_CUDA_TRY(cudaFree(d_neighbors));
-  RAFT_CUDA_TRY(cudaFree(d_distances));
-  RAFT_CUDA_TRY(cudaStreamDestroy(stream));
+inline void printf_usage()
+{
+  ::benchmark::PrintDefaultHelp();
+  fprintf(
+    stdout,
+    "          [--build|--search] \n"
+    "          [--overwrite]\n"
+    "          [--data_prefix=<prefix>]\n"
+    "          [--index_prefix=<prefix>]\n"
+    "          [--override_kv=<key:value1:value2:...:valueN>]\n"
+    "          <conf>.json\n"
+    "\n"
+    "Note the non-standard benchmark parameters:\n"
+    "  --build: build mode, will build index\n"
+    "  --search: search mode, will search using the built index\n"
+    "            one and only one of --build and --search should be specified\n"
+    "  --overwrite: force overwriting existing index files\n"
+    "  --data_prefix=<prefix>:"
+    " prepend <prefix> to dataset file paths specified in the <conf>.json (default = 'data/').\n"
+    "  --index_prefix=<prefix>:"
+    " prepend <prefix> to index file paths specified in the <conf>.json (default = 'index/').\n"
+    "  --override_kv=<key:value1:value2:...:valueN>:"
+    " override a build/search key one or more times multiplying the number of configurations;"
+    " you can use this parameter multiple times to get the Cartesian product of benchmark"
+    " configs.\n");
 }
 
-inline const std::string usage(const string& argv0)
+template <typename T>
+void register_build(std::shared_ptr<const Dataset<T>> dataset,
+                    std::vector<Configuration::Index> indices,
+                    bool force_overwrite)
 {
-  return "usage: " + argv0 + " -b|s [-c] [-f] [-i index_names] conf.json\n" +
-         "   -b: build mode, will build index\n" +
-         "   -s: search mode, will search using built index\n" +
-         "       one and only one of -b and -s should be specified\n" +
-         "   -c: just check command line options and conf.json are sensible\n" +
-         "       won't build or search\n" + "   -f: force overwriting existing output files\n" +
-         "   -i: by default will build/search all the indices found in conf.json\n" +
-         "       '-i' can be used to select a subset of indices\n" +
-         "       'index_names' is a list of comma-separated index names\n" +
-         "       '*' is allowed as the last character of a name to select all matched indices\n" +
-         "       for example, -i \"hnsw1,hnsw2,faiss\" or -i \"hnsw*,faiss\"";
+  for (auto index : indices) {
+    auto suf      = static_cast<std::string>(index.build_param["override_suffix"]);
+    auto file_suf = suf;
+    index.build_param.erase("override_suffix");
+    std::replace(file_suf.begin(), file_suf.end(), '/', '-');
+    index.file += file_suf;
+    auto* b = ::benchmark::RegisterBenchmark(
+      index.name + suf, bench_build<T>, dataset, index, force_overwrite);
+    b->Unit(benchmark::kSecond);
+    b->UseRealTime();
+  }
 }
 
 template <typename T>
-inline int dispatch_benchmark(const Configuration& conf,
-                              const std::string& index_patterns,
-                              bool force_overwrite,
-                              bool only_check,
-                              bool build_mode,
-                              bool search_mode)
+void register_search(std::shared_ptr<const Dataset<T>> dataset,
+                     std::vector<Configuration::Index> indices)
 {
-  try {
-    const auto dataset_conf = conf.get_dataset_conf();
-
-    BinDataset<T> dataset(dataset_conf.name,
-                          dataset_conf.base_file,
-                          dataset_conf.subset_first_row,
-                          dataset_conf.subset_size,
-                          dataset_conf.query_file,
-                          dataset_conf.distance);
-
-    vector<Configuration::Index> indices = conf.get_indices(index_patterns);
-    if (!check(indices, build_mode, force_overwrite)) { return -1; }
-
-    std::string message = "will ";
-    message += build_mode ? "build:" : "search:";
-    for (const auto& index : indices) {
-      message += "\n  " + index.name;
+  for (auto index : indices) {
+    for (std::size_t i = 0; i < index.search_params.size(); i++) {
+      auto suf = static_cast<std::string>(index.search_params[i]["override_suffix"]);
+      index.search_params[i].erase("override_suffix");
+      auto* b =
+        ::benchmark::RegisterBenchmark(index.name + suf, bench_search<T>, dataset, index, i);
+      b->Unit(benchmark::kMillisecond);
+      b->UseRealTime();
     }
-    log_info("%s", message.c_str());
+  }
+}
 
-    if (only_check) {
-      log_info("%s", "all check passed, quit due to option -c");
-      return 0;
+template <typename T>
+void dispatch_benchmark(const Configuration& conf,
+                        bool force_overwrite,
+                        bool build_mode,
+                        bool search_mode,
+                        std::string data_prefix,
+                        std::string index_prefix,
+                        kv_series override_kv)
+{
+  if (CUDART_FOUND) {
+    for (auto [key, value] : cuda_info()) {
+      ::benchmark::AddCustomContext(key, value);
     }
-
-    if (build_mode) {
-      build(&dataset, indices);
-    } else if (search_mode) {
-      search(&dataset, indices);
+  }
+  const auto dataset_conf = conf.get_dataset_conf();
+  auto base_file          = combine_path(data_prefix, dataset_conf.base_file);
+  auto query_file         = combine_path(data_prefix, dataset_conf.query_file);
+  auto gt_file            = dataset_conf.groundtruth_neighbors_file;
+  if (gt_file.has_value()) { gt_file.emplace(combine_path(data_prefix, gt_file.value())); }
+  auto dataset = std::make_shared<BinDataset<T>>(dataset_conf.name,
+                                                 base_file,
+                                                 dataset_conf.subset_first_row,
+                                                 dataset_conf.subset_size,
+                                                 query_file,
+                                                 dataset_conf.distance,
+                                                 gt_file);
+  ::benchmark::AddCustomContext("dataset", dataset_conf.name);
+  ::benchmark::AddCustomContext("distance", dataset_conf.distance);
+  std::vector<Configuration::Index> indices = conf.get_indices();
+  if (build_mode) {
+    if (file_exists(base_file)) {
+      log_info("Using the dataset file '%s'", base_file.c_str());
+      ::benchmark::AddCustomContext("n_records", std::to_string(dataset->base_set_size()));
+      ::benchmark::AddCustomContext("dim", std::to_string(dataset->dim()));
+    } else {
+      log_warn("Dataset file '%s' does not exist; benchmarking index building is impossible.",
+               base_file.c_str());
     }
-  } catch (const std::exception& e) {
-    log_error("exception occurred: %s", e.what());
-    return -1;
+    std::vector<Configuration::Index> more_indices{};
+    for (auto& index : indices) {
+      for (auto param : apply_overrides(index.build_param, override_kv)) {
+        auto modified_index        = index;
+        modified_index.build_param = param;
+        modified_index.file        = combine_path(index_prefix, modified_index.file);
+        more_indices.push_back(modified_index);
+      }
+    }
+    register_build<T>(dataset, more_indices, force_overwrite);
+  } else if (search_mode) {
+    if (file_exists(query_file)) {
+      log_info("Using the query file '%s'", query_file.c_str());
+      ::benchmark::AddCustomContext("max_n_queries", std::to_string(dataset->query_set_size()));
+      ::benchmark::AddCustomContext("dim", std::to_string(dataset->dim()));
+      if (gt_file.has_value()) {
+        if (file_exists(*gt_file)) {
+          log_info("Using the ground truth file '%s'", gt_file->c_str());
+          ::benchmark::AddCustomContext("max_k", std::to_string(dataset->max_k()));
+        } else {
+          log_warn("Ground truth file '%s' does not exist; the recall won't be reported.",
+                   gt_file->c_str());
+        }
+      } else {
+        log_warn(
+          "Ground truth file is not provided; the recall won't be reported. NB: use "
+          "the 'groundtruth_neighbors_file' alongside the 'query_file' key to specify the path to "
+          "the ground truth in your conf.json.");
+      }
+    } else {
+      log_warn("Query file '%s' does not exist; benchmarking search is impossible.",
+               query_file.c_str());
+    }
+    for (auto& index : indices) {
+      index.search_params = apply_overrides(index.search_params, override_kv);
+      index.file          = combine_path(index_prefix, index.file);
+    }
+    register_search<T>(dataset, indices);
   }
+}
 
-  return 0;
+inline auto parse_bool_flag(const char* arg, const char* pat, bool& result) -> bool
+{
+  if (strcmp(arg, pat) == 0) {
+    result = true;
+    return true;
+  }
+  return false;
 }
 
-inline int run_main(int argc, char** argv)
+inline auto parse_string_flag(const char* arg, const char* pat, std::string& result) -> bool
 {
-  bool force_overwrite = false;
-  bool build_mode      = false;
-  bool search_mode     = false;
-  bool only_check      = false;
-  std::string index_patterns("*");
-  std::string dataset_memory("device");
-
-  int opt;
-  while ((opt = getopt(argc, argv, "bscfi:h")) != -1) {
-    switch (opt) {
-      case 'b': build_mode = true; break;
-      case 's': search_mode = true; break;
-      case 'c': only_check = true; break;
-      case 'f': force_overwrite = true; break;
-      case 'i': index_patterns = optarg; break;
-      case 'm': dataset_memory = optarg; break;
-      case 'h': cout << usage(argv[0]) << endl; return -1;
-      default: cerr << "\n" << usage(argv[0]) << endl; return -1;
-    }
+  auto n = strlen(pat);
+  if (strncmp(pat, arg, strlen(pat)) == 0) {
+    result = arg + n + 1;
+    return true;
   }
-  if (build_mode == search_mode) {
-    std::cerr << "one and only one of -b and -s should be specified\n\n" << usage(argv[0]) << endl;
+  return false;
+}
+
+inline auto run_main(int argc, char** argv) -> int
+{
+  bool force_overwrite        = false;
+  bool build_mode             = false;
+  bool search_mode            = false;
+  std::string data_prefix     = "data";
+  std::string index_prefix    = "index";
+  std::string new_override_kv = "";
+  kv_series override_kv{};
+
+  char arg0_default[] = "benchmark";  // NOLINT
+  char* args_default  = arg0_default;
+  if (!argv) {
+    argc = 1;
+    argv = &args_default;
+  }
+  if (argc == 1) {
+    printf_usage();
     return -1;
   }
-  if (argc - optind != 1) {
-    std::cerr << usage(argv[0]) << endl;
+
+  char* conf_path = argv[--argc];
+  std::ifstream conf_stream(conf_path);
+
+  for (int i = 1; i < argc; i++) {
+    if (parse_bool_flag(argv[i], "--overwrite", force_overwrite) ||
+        parse_bool_flag(argv[i], "--build", build_mode) ||
+        parse_bool_flag(argv[i], "--search", search_mode) ||
+        parse_string_flag(argv[i], "--data_prefix", data_prefix) ||
+        parse_string_flag(argv[i], "--index_prefix", index_prefix) ||
+        parse_string_flag(argv[i], "--override_kv", new_override_kv)) {
+      if (!new_override_kv.empty()) {
+        auto kvv = split(new_override_kv, ':');
+        auto key = kvv[0];
+        std::vector<nlohmann::json> vals{};
+        for (std::size_t j = 1; j < kvv.size(); j++) {
+          vals.push_back(nlohmann::json::parse(kvv[j]));
+        }
+        override_kv.emplace_back(key, vals);
+        new_override_kv = "";
+      }
+      for (int j = i; j < argc - 1; j++) {
+        argv[j] = argv[j + 1];
+      }
+      argc--;
+      i--;
+    }
+  }
+
+  if (build_mode == search_mode) {
+    log_error("One and only one of --build and --search should be specified");
+    printf_usage();
     return -1;
   }
-  string conf_file = argv[optind];
 
-  std::ifstream conf_stream(conf_file.c_str());
   if (!conf_stream) {
-    log_error("can't open configuration file: %s", argv[optind]);
+    log_error("Can't open configuration file: %s", conf_path);
     return -1;
   }
 
-  try {
-    Configuration conf(conf_stream);
-    std::string dtype = conf.get_dataset_conf().dtype;
-
-    if (dtype == "float") {
-      return dispatch_benchmark<float>(
-        conf, index_patterns, force_overwrite, only_check, build_mode, search_mode);
-    } else if (dtype == "uint8") {
-      return dispatch_benchmark<std::uint8_t>(
-        conf, index_patterns, force_overwrite, only_check, build_mode, search_mode);
-    } else if (dtype == "int8") {
-      return dispatch_benchmark<std::int8_t>(
-        conf, index_patterns, force_overwrite, only_check, build_mode, search_mode);
-    } else {
-      log_error("datatype %s not supported", dtype);
-    }
-
-  } catch (const std::exception& e) {
-    log_error("exception occurred: %s", e.what());
+  if (!CUDART_FOUND) { log_warn("cudart library is not found, GPU-based indices won't work."); }
+
+  Configuration conf(conf_stream);
+  std::string dtype = conf.get_dataset_conf().dtype;
+
+  if (dtype == "float") {
+    dispatch_benchmark<float>(
+      conf, force_overwrite, build_mode, search_mode, data_prefix, index_prefix, override_kv);
+  } else if (dtype == "uint8") {
+    dispatch_benchmark<std::uint8_t>(
+      conf, force_overwrite, build_mode, search_mode, data_prefix, index_prefix, override_kv);
+  } else if (dtype == "int8") {
+    dispatch_benchmark<std::int8_t>(
+      conf, force_overwrite, build_mode, search_mode, data_prefix, index_prefix, override_kv);
+  } else {
+    log_error("datatype '%s' is not supported", dtype.c_str());
     return -1;
   }
 
-  return -1;
+  ::benchmark::Initialize(&argc, argv, printf_usage);
+  if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return -1;
+  ::benchmark::RunSpecifiedBenchmarks();
+  ::benchmark::Shutdown();
+  // Release a possibly cached ANN object, so that it cannot be alive longer than the handle to a
+  // shared library it depends on (dynamic benchmark executable).
+  current_algo.reset();
+  return 0;
 }
+
 };  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/benchmark_util.hpp b/cpp/bench/ann/src/common/benchmark_util.hpp
deleted file mode 100644
index 7005883ffc..0000000000
--- a/cpp/bench/ann/src/common/benchmark_util.hpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "ann_types.hpp"
-#include <string>
-
-namespace raft::bench::ann {
-
-inline Metric parse_metric(const std::string& metric_str)
-{
-  if (metric_str == "inner_product") {
-    return raft::bench::ann::Metric::kInnerProduct;
-  } else if (metric_str == "euclidean") {
-    return raft::bench::ann::Metric::kEuclidean;
-  } else {
-    throw std::runtime_error("invalid metric: '" + metric_str + "'");
-  }
-}
-};  // namespace raft::bench::ann
\ No newline at end of file
diff --git a/cpp/bench/ann/src/common/conf.cpp b/cpp/bench/ann/src/common/conf.cpp
index dbb1b5347c..098313db45 100644
--- a/cpp/bench/ann/src/common/conf.cpp
+++ b/cpp/bench/ann/src/common/conf.cpp
@@ -21,7 +21,7 @@
 #include <unordered_set>
 #include <vector>
 
-#include "util.h"
+#include "util.hpp"
 
 namespace raft::bench::ann {
 using std::runtime_error;
diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp
new file mode 100644
index 0000000000..b22986814d
--- /dev/null
+++ b/cpp/bench/ann/src/common/conf.hpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "util.hpp"
+
+#include <iostream>
+#include <optional>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#define JSON_DIAGNOSTICS 1
+#include <nlohmann/json.hpp>
+
+namespace raft::bench::ann {
+
+class Configuration {
+ public:
+  struct Index {
+    std::string name;
+    std::string algo;
+    nlohmann::json build_param;
+    std::string file;
+    std::vector<int> dev_list;
+    nlohmann::json index_conf;
+
+    int batch_size;
+    int k;
+    std::vector<nlohmann::json> search_params;
+  };
+
+  struct DatasetConf {
+    std::string name;
+    std::string base_file;
+    // use only a subset of base_file,
+    // the range of rows is [subset_first_row, subset_first_row + subset_size)
+    // however, subset_size = 0 means using all rows after subset_first_row
+    // that is, the subset is [subset_first_row, #rows in base_file)
+    size_t subset_first_row{0};
+    size_t subset_size{0};
+    std::string query_file;
+    std::string distance;
+    std::optional<std::string> groundtruth_neighbors_file{std::nullopt};
+
+    // data type of input dataset, possible values ["float", "int8", "uint8"]
+    std::string dtype;
+  };
+
+  explicit inline Configuration(std::istream& conf_stream)
+  {
+    // to enable comments in json
+    auto conf = nlohmann::json::parse(conf_stream, nullptr, true, true);
+
+    parse_dataset_(conf.at("dataset"));
+    parse_index_(conf.at("index"), conf.at("search_basic_param"));
+  }
+
+  [[nodiscard]] inline auto get_dataset_conf() const -> DatasetConf { return dataset_conf_; }
+  [[nodiscard]] inline auto get_indices() const -> std::vector<Index> { return indices_; };
+
+ private:
+  inline void parse_dataset_(const nlohmann::json& conf)
+  {
+    dataset_conf_.name       = conf.at("name");
+    dataset_conf_.base_file  = conf.at("base_file");
+    dataset_conf_.query_file = conf.at("query_file");
+    dataset_conf_.distance   = conf.at("distance");
+
+    if (conf.contains("groundtruth_neighbors_file")) {
+      dataset_conf_.groundtruth_neighbors_file = conf.at("groundtruth_neighbors_file");
+    }
+    if (conf.contains("subset_first_row")) {
+      dataset_conf_.subset_first_row = conf.at("subset_first_row");
+    }
+    if (conf.contains("subset_size")) { dataset_conf_.subset_size = conf.at("subset_size"); }
+
+    if (conf.contains("dtype")) {
+      dataset_conf_.dtype = conf.at("dtype");
+    } else {
+      auto filename = dataset_conf_.base_file;
+      if (!filename.compare(filename.size() - 4, 4, "fbin")) {
+        dataset_conf_.dtype = "float";
+      } else if (!filename.compare(filename.size() - 5, 5, "u8bin")) {
+        dataset_conf_.dtype = "uint8";
+      } else if (!filename.compare(filename.size() - 5, 5, "i8bin")) {
+        dataset_conf_.dtype = "int8";
+      } else {
+        log_error("Could not determine data type of the dataset %s", filename.c_str());
+      }
+    }
+  }
+  inline void parse_index_(const nlohmann::json& index_conf,
+                           const nlohmann::json& search_basic_conf)
+  {
+    const int batch_size = search_basic_conf.at("batch_size");
+    const int k          = search_basic_conf.at("k");
+
+    for (const auto& conf : index_conf) {
+      Index index;
+      index.name        = conf.at("name");
+      index.algo        = conf.at("algo");
+      index.build_param = conf.at("build_param");
+      index.file        = conf.at("file");
+      index.batch_size  = batch_size;
+      index.k           = k;
+      index.index_conf  = conf;
+
+      if (conf.contains("multigpu")) {
+        for (auto it : conf.at("multigpu")) {
+          index.dev_list.push_back(it);
+        }
+        if (index.dev_list.empty()) { throw std::runtime_error("dev_list shouln't be empty!"); }
+        index.dev_list.shrink_to_fit();
+        index.build_param["multigpu"] = conf["multigpu"];
+      }
+
+      for (auto param : conf.at("search_params")) {
+        /*  ### Special parameters for backward compatibility ###
+
+          - Local values of `k` and `n_queries` take priority.
+          - The legacy "batch_size" renamed to `n_queries`.
+          - Basic search params are used otherwise.
+        */
+        if (!param.contains("k")) { param["k"] = k; }
+        if (!param.contains("n_queries")) {
+          if (param.contains("batch_size")) {
+            param["n_queries"] = param["batch_size"];
+            param.erase("batch_size");
+          } else {
+            param["n_queries"] = batch_size;
+          }
+        }
+        index.search_params.push_back(param);
+      }
+
+      indices_.push_back(index);
+    }
+  }
+
+  DatasetConf dataset_conf_;
+  std::vector<Index> indices_;
+};
+
+}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/cuda_stub.hpp b/cpp/bench/ann/src/common/cuda_stub.hpp
new file mode 100644
index 0000000000..e3f9aa9e84
--- /dev/null
+++ b/cpp/bench/ann/src/common/cuda_stub.hpp
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#ifdef ANN_BENCH_LINK_CUDART
+#include <cuda_runtime_api.h>
+#else
+#define CPU_ONLY
+typedef void* cudaStream_t;
+typedef void* cudaEvent_t;
+#endif
+
+#include <dlfcn.h>
+
+namespace raft::bench::ann {
+
+struct cuda_lib_handle {
+  void* handle{nullptr};
+  explicit cuda_lib_handle()
+  {
+#ifdef ANN_BENCH_LINK_CUDART
+    handle = dlopen(ANN_BENCH_LINK_CUDART, RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE);
+#endif
+  }
+  ~cuda_lib_handle() noexcept
+  {
+    if (handle != nullptr) { dlclose(handle); }
+  }
+
+  [[nodiscard]] inline auto found() const -> bool { return handle != nullptr; }
+};
+
+static inline cuda_lib_handle cudart{};
+
+#ifndef CPU_ONLY
+namespace stub {
+
+[[gnu::weak, gnu::noinline]] cudaError_t cudaMemcpy(void* dst,
+                                                    const void* src,
+                                                    size_t count,
+                                                    enum cudaMemcpyKind kind)
+{
+  return cudaSuccess;
+}
+
+[[gnu::weak, gnu::noinline]] cudaError_t cudaMalloc(void** ptr, size_t size)
+{
+  *ptr = nullptr;
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaMemset(void* devPtr, int value, size_t count)
+{
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaFree(void* devPtr) { return cudaSuccess; }
+[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamCreate(cudaStream_t* pStream)
+{
+  *pStream = 0;
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamCreateWithFlags(cudaStream_t* pStream,
+                                                                   unsigned int flags)
+{
+  *pStream = 0;
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamDestroy(cudaStream_t pStream)
+{
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaStreamSynchronize(cudaStream_t pStream)
+{
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaEventCreate(cudaEvent_t* event)
+{
+  *event = 0;
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream)
+{
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaEventSynchronize(cudaEvent_t event)
+{
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaEventElapsedTime(float* ms,
+                                                              cudaEvent_t start,
+                                                              cudaEvent_t end)
+{
+  *ms = 0;
+  return cudaSuccess;
+}
+[[gnu::weak, gnu::noinline]] cudaError_t cudaEventDestroy(cudaEvent_t event) { return cudaSuccess; }
+[[gnu::weak, gnu::noinline]] cudaError_t cudaGetDevice(int* device)
+{
+  *device = 0;
+  return cudaSuccess;
+};
+[[gnu::weak, gnu::noinline]] cudaError_t cudaDriverGetVersion(int* driver)
+{
+  *driver = 0;
+  return cudaSuccess;
+};
+[[gnu::weak, gnu::noinline]] cudaError_t cudaRuntimeGetVersion(int* runtime)
+{
+  *runtime = 0;
+  return cudaSuccess;
+};
+[[gnu::weak, gnu::noinline]] cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp* prop,
+                                                                 int device)
+{
+  *prop = cudaDeviceProp{};
+  return cudaSuccess;
+}
+
+}  // namespace stub
+
+#define RAFT_DECLARE_CUDART(fun)                                                        \
+  static inline decltype(&stub::fun) fun =                                              \
+    cudart.found() ? reinterpret_cast<decltype(&stub::fun)>(dlsym(cudart.handle, #fun)) \
+                   : &stub::fun
+
+RAFT_DECLARE_CUDART(cudaMemcpy);
+RAFT_DECLARE_CUDART(cudaMalloc);
+RAFT_DECLARE_CUDART(cudaMemset);
+RAFT_DECLARE_CUDART(cudaFree);
+RAFT_DECLARE_CUDART(cudaStreamCreate);
+RAFT_DECLARE_CUDART(cudaStreamCreateWithFlags);
+RAFT_DECLARE_CUDART(cudaStreamDestroy);
+RAFT_DECLARE_CUDART(cudaStreamSynchronize);
+RAFT_DECLARE_CUDART(cudaEventCreate);
+RAFT_DECLARE_CUDART(cudaEventRecord);
+RAFT_DECLARE_CUDART(cudaEventSynchronize);
+RAFT_DECLARE_CUDART(cudaEventElapsedTime);
+RAFT_DECLARE_CUDART(cudaEventDestroy);
+RAFT_DECLARE_CUDART(cudaGetDevice);
+RAFT_DECLARE_CUDART(cudaDriverGetVersion);
+RAFT_DECLARE_CUDART(cudaRuntimeGetVersion);
+RAFT_DECLARE_CUDART(cudaGetDeviceProperties);
+
+#undef RAFT_DECLARE_CUDART
+#endif
+
+};  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/dataset.h b/cpp/bench/ann/src/common/dataset.hpp
similarity index 85%
rename from cpp/bench/ann/src/common/dataset.h
rename to cpp/bench/ann/src/common/dataset.hpp
index ae05cd02a1..7fa82a632f 100644
--- a/cpp/bench/ann/src/common/dataset.h
+++ b/cpp/bench/ann/src/common/dataset.hpp
@@ -15,11 +15,10 @@
  */
 #pragma once
 
-#include <cstdint>
+#include "util.hpp"
 
 #ifndef CPU_ONLY
 #include <cuda_fp16.h>
-#include <raft/util/cudart_utils.hpp>
 #else
 typedef uint16_t half;
 #endif
@@ -29,7 +28,9 @@ typedef uint16_t half;
 #include <sys/stat.h>
 
 #include <cassert>
+#include <cstdint>
 #include <cstdio>
+#include <optional>
 #include <stdexcept>
 #include <string>
 #include <type_traits>
@@ -54,7 +55,8 @@ class BinFile {
           uint32_t subset_size      = 0);
   ~BinFile()
   {
-    if (fp_) { fclose(fp_); }
+    if (mapped_ptr_ != nullptr) { unmap(); }
+    if (fp_ != nullptr) { fclose(fp_); }
   }
   BinFile(const BinFile&)            = delete;
   BinFile& operator=(const BinFile&) = delete;
@@ -101,6 +103,7 @@ class BinFile {
     int fid     = fileno(fp_);
     mapped_ptr_ = mmap(nullptr, file_size_, PROT_READ, MAP_PRIVATE, fid, 0);
     if (mapped_ptr_ == MAP_FAILED) {
+      mapped_ptr_ = nullptr;
       throw std::runtime_error("mmap error: Value of errno " + std::to_string(errno) + ", " +
                                std::string(strerror(errno)));
     }
@@ -124,11 +127,11 @@ class BinFile {
   uint32_t subset_first_row_;
   uint32_t subset_size_;
 
-  mutable FILE* fp_;
+  mutable FILE* fp_{nullptr};
   mutable uint32_t nrows_;
   mutable uint32_t ndims_;
   mutable size_t file_size_;
-  mutable void* mapped_ptr_;
+  mutable void* mapped_ptr_{nullptr};
 };
 
 template <typename T>
@@ -254,6 +257,7 @@ class Dataset {
   std::string name() const { return name_; }
   std::string distance() const { return distance_; }
   virtual int dim() const               = 0;
+  virtual uint32_t max_k() const        = 0;
   virtual size_t base_set_size() const  = 0;
   virtual size_t query_set_size() const = 0;
 
@@ -271,12 +275,37 @@ class Dataset {
     return query_set_;
   }
 
+  const int32_t* gt_set() const
+  {
+    if (!gt_set_) { load_gt_set_(); }
+    return gt_set_;
+  }
+
   const T* base_set_on_gpu() const;
   const T* query_set_on_gpu() const;
   const T* mapped_base_set() const;
 
+  auto query_set(MemoryType memory_type) const -> const T*
+  {
+    switch (memory_type) {
+      case MemoryType::Device: return query_set_on_gpu();
+      default: return query_set();
+    }
+  }
+
+  auto base_set(MemoryType memory_type) const -> const T*
+  {
+    switch (memory_type) {
+      case MemoryType::Device: return base_set_on_gpu();
+      case MemoryType::Host: return base_set();
+      case MemoryType::HostMmap: return mapped_base_set();
+      default: return nullptr;
+    }
+  }
+
  protected:
   virtual void load_base_set_() const  = 0;
+  virtual void load_gt_set_() const    = 0;
   virtual void load_query_set_() const = 0;
   virtual void map_base_set_() const   = 0;
 
@@ -288,6 +317,7 @@ class Dataset {
   mutable T* d_base_set_      = nullptr;
   mutable T* d_query_set_     = nullptr;
   mutable T* mapped_base_set_ = nullptr;
+  mutable int32_t* gt_set_    = nullptr;
 };
 
 template <typename T>
@@ -295,6 +325,7 @@ Dataset<T>::~Dataset()
 {
   delete[] base_set_;
   delete[] query_set_;
+  delete[] gt_set_;
 #ifndef CPU_ONLY
   if (d_base_set_) { cudaFree(d_base_set_); }
   if (d_query_set_) { cudaFree(d_query_set_); }
@@ -307,9 +338,8 @@ const T* Dataset<T>::base_set_on_gpu() const
 #ifndef CPU_ONLY
   if (!d_base_set_) {
     base_set();
-    RAFT_CUDA_TRY(cudaMalloc((void**)&d_base_set_, base_set_size() * dim() * sizeof(T)));
-    RAFT_CUDA_TRY(cudaMemcpy(
-      d_base_set_, base_set_, base_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice));
+    cudaMalloc((void**)&d_base_set_, base_set_size() * dim() * sizeof(T));
+    cudaMemcpy(d_base_set_, base_set_, base_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice);
   }
 #endif
   return d_base_set_;
@@ -321,9 +351,9 @@ const T* Dataset<T>::query_set_on_gpu() const
 #ifndef CPU_ONLY
   if (!d_query_set_) {
     query_set();
-    RAFT_CUDA_TRY(cudaMalloc((void**)&d_query_set_, query_set_size() * dim() * sizeof(T)));
-    RAFT_CUDA_TRY(cudaMemcpy(
-      d_query_set_, query_set_, query_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice));
+    cudaMalloc((void**)&d_query_set_, query_set_size() * dim() * sizeof(T));
+    cudaMemcpy(
+      d_query_set_, query_set_, query_set_size() * dim() * sizeof(T), cudaMemcpyHostToDevice);
   }
 #endif
   return d_query_set_;
@@ -344,27 +374,28 @@ class BinDataset : public Dataset<T> {
              size_t subset_first_row,
              size_t subset_size,
              const std::string& query_file,
-             const std::string& distance);
-  ~BinDataset()
-  {
-    if (this->mapped_base_set_) { base_file_.unmap(); }
-  }
+             const std::string& distance,
+             const std::optional<std::string>& groundtruth_neighbors_file);
 
   int dim() const override;
+  uint32_t max_k() const override;
   size_t base_set_size() const override;
   size_t query_set_size() const override;
 
  private:
   void load_base_set_() const override;
   void load_query_set_() const override;
+  void load_gt_set_() const override;
   void map_base_set_() const override;
 
   mutable int dim_               = 0;
+  mutable uint32_t max_k_        = 0;
   mutable size_t base_set_size_  = 0;
   mutable size_t query_set_size_ = 0;
 
   BinFile<T> base_file_;
   BinFile<T> query_file_;
+  std::optional<BinFile<std::int32_t>> gt_file_{std::nullopt};
 };
 
 template <typename T>
@@ -373,11 +404,15 @@ BinDataset<T>::BinDataset(const std::string& name,
                           size_t subset_first_row,
                           size_t subset_size,
                           const std::string& query_file,
-                          const std::string& distance)
+                          const std::string& distance,
+                          const std::optional<std::string>& groundtruth_neighbors_file)
   : Dataset<T>(name, distance),
     base_file_(base_file, "r", subset_first_row, subset_size),
     query_file_(query_file, "r")
 {
+  if (groundtruth_neighbors_file.has_value()) {
+    gt_file_.emplace(groundtruth_neighbors_file.value(), "r");
+  }
 }
 
 template <typename T>
@@ -389,6 +424,13 @@ int BinDataset<T>::dim() const
   return dim_;
 }
 
+template <typename T>
+uint32_t BinDataset<T>::max_k() const
+{
+  if (!this->gt_set_) { load_gt_set_(); }
+  return max_k_;
+}
+
 template <typename T>
 size_t BinDataset<T>::query_set_size() const
 {
@@ -437,6 +479,19 @@ void BinDataset<T>::load_query_set_() const
   query_file_.read(this->query_set_);
 }
 
+template <typename T>
+void BinDataset<T>::load_gt_set_() const
+{
+  if (gt_file_.has_value()) {
+    size_t queries;
+    int k;
+    gt_file_->get_shape(&queries, &k);
+    this->gt_set_ = new std::int32_t[queries * k];
+    gt_file_->read(this->gt_set_);
+    max_k_ = k;
+  }
+}
+
 template <typename T>
 void BinDataset<T>::map_base_set_() const
 {
diff --git a/cpp/bench/ann/src/common/util.cpp b/cpp/bench/ann/src/common/util.cpp
deleted file mode 100644
index 17636f76d7..0000000000
--- a/cpp/bench/ann/src/common/util.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "util.h"
-
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include <cstring>
-#include <sstream>
-
-namespace raft::bench::ann {
-
-std::vector<std::string> split(const std::string& s, char delimiter)
-{
-  std::vector<std::string> tokens;
-  std::string token;
-  std::istringstream iss(s);
-  while (getline(iss, token, delimiter)) {
-    if (!token.empty()) { tokens.push_back(token); }
-  }
-  return tokens;
-}
-
-bool file_exists(const std::string& filename)
-{
-  struct stat statbuf;
-  if (stat(filename.c_str(), &statbuf) != 0) { return false; }
-  return S_ISREG(statbuf.st_mode);
-}
-
-bool dir_exists(const std::string& dir)
-{
-  struct stat statbuf;
-  if (stat(dir.c_str(), &statbuf) != 0) { return false; }
-  return S_ISDIR(statbuf.st_mode);
-}
-
-bool create_dir(const std::string& dir)
-{
-  const auto path = split(dir, '/');
-
-  std::string cwd;
-  if (!dir.empty() && dir[0] == '/') { cwd += '/'; }
-
-  for (const auto& p : path) {
-    cwd += p + "/";
-    if (!dir_exists(cwd)) {
-      int ret = mkdir(cwd.c_str(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
-      if (ret != 0) { return false; }
-    }
-  }
-  return true;
-}
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/util.h b/cpp/bench/ann/src/common/util.h
deleted file mode 100644
index 290bf4cea9..0000000000
--- a/cpp/bench/ann/src/common/util.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <chrono>
-#include <cstdio>
-#include <ctime>
-#include <iostream>
-#include <string>
-#include <vector>
-
-namespace raft::bench::ann {
-
-class Timer {
- public:
-  Timer() { reset(); }
-  void reset() { start_time_ = std::chrono::steady_clock::now(); }
-  float elapsed_ms()
-  {
-    auto end_time = std::chrono::steady_clock::now();
-    auto dur =
-      std::chrono::duration_cast<std::chrono::duration<float, std::milli>>(end_time - start_time_);
-    return dur.count();
-  }
-
- private:
-  std::chrono::steady_clock::time_point start_time_;
-};
-
-std::vector<std::string> split(const std::string& s, char delimiter);
-
-bool file_exists(const std::string& filename);
-bool dir_exists(const std::string& dir);
-bool create_dir(const std::string& dir);
-
-template <typename... Ts>
-void log_(const char* level, Ts... vs)
-{
-  char buf[20];
-  std::time_t now = std::time(nullptr);
-  std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now));
-  printf("%s [%s] ", buf, level);
-  printf(vs...);
-  printf("\n");
-  fflush(stdout);
-}
-
-template <typename... Ts>
-void log_info(Ts... vs)
-{
-  log_("info", vs...);
-}
-
-template <typename... Ts>
-void log_warn(Ts... vs)
-{
-  log_("warn", vs...);
-}
-
-template <typename... Ts>
-void log_error(Ts... vs)
-{
-  log_("error", vs...);
-}
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp
new file mode 100644
index 0000000000..faf440071d
--- /dev/null
+++ b/cpp/bench/ann/src/common/util.hpp
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "ann_types.hpp"
+
+#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
+#include <nvtx3/nvToolsExt.h>
+#endif
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <chrono>
+#include <cstdio>
+#include <cstring>
+#include <ctime>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <filesystem>
+#include <functional>
+
+namespace raft::bench::ann {
+
+template <typename T>
+struct buf {
+  MemoryType memory_type;
+  std::size_t size;
+  T* data;
+  buf(MemoryType memory_type, std::size_t size)
+    : memory_type(memory_type), size(size), data(nullptr)
+  {
+    switch (memory_type) {
+#ifndef CPU_ONLY
+      case MemoryType::Device: {
+        cudaMalloc(reinterpret_cast<void**>(&data), size * sizeof(T));
+        cudaMemset(data, 0, size * sizeof(T));
+      } break;
+#endif
+      default: {
+        data = reinterpret_cast<T*>(malloc(size * sizeof(T)));
+        std::memset(data, 0, size * sizeof(T));
+      }
+    }
+  }
+  ~buf() noexcept
+  {
+    if (data == nullptr) { return; }
+    switch (memory_type) {
+#ifndef CPU_ONLY
+      case MemoryType::Device: {
+        cudaFree(data);
+      } break;
+#endif
+      default: {
+        free(data);
+      }
+    }
+  }
+
+  [[nodiscard]] auto move(MemoryType target_memory_type) -> buf<T>
+  {
+    buf<T> r{target_memory_type, size};
+#ifndef CPU_ONLY
+    if ((memory_type == MemoryType::Device && target_memory_type != MemoryType::Device) ||
+        (memory_type != MemoryType::Device && target_memory_type == MemoryType::Device)) {
+      cudaMemcpy(r.data, data, size * sizeof(T), cudaMemcpyDefault);
+      return r;
+    }
+#endif
+    std::swap(data, r.data);
+    return r;
+  }
+};
+
+struct cuda_timer {
+ private:
+  cudaStream_t stream_{nullptr};
+  cudaEvent_t start_{nullptr};
+  cudaEvent_t stop_{nullptr};
+  double total_time_{0};
+
+ public:
+  struct cuda_lap {
+   private:
+    cudaStream_t stream_;
+    cudaEvent_t start_;
+    cudaEvent_t stop_;
+    double& total_time_;
+
+   public:
+    cuda_lap(cudaStream_t stream, cudaEvent_t start, cudaEvent_t stop, double& total_time)
+      : start_(start), stop_(stop), stream_(stream), total_time_(total_time)
+    {
+#ifndef CPU_ONLY
+      cudaStreamSynchronize(stream_);
+      cudaEventRecord(start_, stream_);
+#endif
+    }
+    cuda_lap() = delete;
+
+    ~cuda_lap() noexcept
+    {
+#ifndef CPU_ONLY
+      cudaEventRecord(stop_, stream_);
+      cudaEventSynchronize(stop_);
+      float milliseconds = 0.0f;
+      cudaEventElapsedTime(&milliseconds, start_, stop_);
+      total_time_ += milliseconds / 1000.0;
+#endif
+    }
+  };
+
+  cuda_timer()
+  {
+#ifndef CPU_ONLY
+    cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking);
+    cudaEventCreate(&stop_);
+    cudaEventCreate(&start_);
+#endif
+  }
+
+  ~cuda_timer() noexcept
+  {
+#ifndef CPU_ONLY
+    cudaEventDestroy(start_);
+    cudaEventDestroy(stop_);
+    cudaStreamDestroy(stream_);
+#endif
+  }
+
+  [[nodiscard]] auto stream() const -> cudaStream_t { return stream_; }
+
+  [[nodiscard]] auto total_time() const -> double { return total_time_; }
+
+  [[nodiscard]] auto lap() -> cuda_timer::cuda_lap
+  {
+    return cuda_lap{stream_, start_, stop_, total_time_};
+  }
+};
+
+inline auto cuda_info()
+{
+  std::vector<std::tuple<std::string, std::string>> props;
+#ifndef CPU_ONLY
+  int dev, driver = 0, runtime = 0;
+  cudaDriverGetVersion(&driver);
+  cudaRuntimeGetVersion(&runtime);
+
+  cudaDeviceProp device_prop;
+  cudaGetDevice(&dev);
+  cudaGetDeviceProperties(&device_prop, dev);
+  props.emplace_back("gpu_name", std::string(device_prop.name));
+  props.emplace_back("gpu_sm_count", std::to_string(device_prop.multiProcessorCount));
+  props.emplace_back("gpu_sm_freq", std::to_string(device_prop.clockRate * 1e3));
+  props.emplace_back("gpu_mem_freq", std::to_string(device_prop.memoryClockRate * 1e3));
+  props.emplace_back("gpu_mem_bus_width", std::to_string(device_prop.memoryBusWidth));
+  props.emplace_back("gpu_mem_global_size", std::to_string(device_prop.totalGlobalMem));
+  props.emplace_back("gpu_mem_shared_size", std::to_string(device_prop.sharedMemPerMultiprocessor));
+  props.emplace_back("gpu_driver_version",
+                     std::to_string(driver / 1000) + "." + std::to_string((driver % 100) / 10));
+  props.emplace_back("gpu_runtime_version",
+                     std::to_string(runtime / 1000) + "." + std::to_string((runtime % 100) / 10));
+#endif
+  return props;
+}
+
+struct nvtx_case {
+#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
+ private:
+  std::string case_name_;
+  std::array<char, 32> iter_name_{0};
+  nvtxDomainHandle_t domain_;
+  int64_t iteration_ = 0;
+  nvtxEventAttributes_t case_attrib_{0};
+  nvtxEventAttributes_t iter_attrib_{0};
+#endif
+
+ public:
+  struct nvtx_lap {
+#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
+   private:
+    nvtxDomainHandle_t domain_;
+
+   public:
+    nvtx_lap(nvtxDomainHandle_t domain, nvtxEventAttributes_t* attr) : domain_(domain)
+    {
+      nvtxDomainRangePushEx(domain_, attr);
+    }
+    nvtx_lap() = delete;
+    ~nvtx_lap() noexcept { nvtxDomainRangePop(domain_); }
+#endif
+  };
+
+#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
+  explicit nvtx_case(std::string case_name)
+    : case_name_(std::move(case_name)), domain_(nvtxDomainCreateA("ANN benchmark"))
+  {
+    case_attrib_.version       = NVTX_VERSION;
+    iter_attrib_.version       = NVTX_VERSION;
+    case_attrib_.size          = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+    iter_attrib_.size          = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+    case_attrib_.colorType     = NVTX_COLOR_ARGB;
+    iter_attrib_.colorType     = NVTX_COLOR_ARGB;
+    case_attrib_.messageType   = NVTX_MESSAGE_TYPE_ASCII;
+    iter_attrib_.messageType   = NVTX_MESSAGE_TYPE_ASCII;
+    case_attrib_.message.ascii = case_name_.c_str();
+    auto c                     = std::hash<std::string>{}(case_name_);
+    case_attrib_.color         = c | 0xA0A0A0;
+    nvtxDomainRangePushEx(domain_, &case_attrib_);
+  }
+
+  ~nvtx_case()
+  {
+    nvtxDomainRangePop(domain_);
+    nvtxDomainDestroy(domain_);
+  }
+#else
+  explicit nvtx_case(std::string) {}
+#endif
+
+  [[nodiscard]] auto lap() -> nvtx_case::nvtx_lap
+  {
+#ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
+    auto i     = iteration_++;
+    uint32_t c = (i % 5);
+    uint32_t r = 150 + c * 20;
+    uint32_t g = 200 + c * 10;
+    uint32_t b = 220 + c * 5;
+    std::snprintf(iter_name_.data(), iter_name_.size(), "Lap %zd", i);
+    iter_attrib_.message.ascii = iter_name_.data();
+    iter_attrib_.color         = (r << 16) + (g << 8) + b;
+    return nvtx_lap{domain_, &iter_attrib_};
+#else
+    return nvtx_lap{};
+#endif
+  }
+};
+
+inline std::vector<std::string> split(const std::string& s, char delimiter)
+{
+  std::vector<std::string> tokens;
+  std::string token;
+  std::istringstream iss(s);
+  while (getline(iss, token, delimiter)) {
+    if (!token.empty()) { tokens.push_back(token); }
+  }
+  return tokens;
+}
+
+inline bool file_exists(const std::string& filename)
+{
+  struct stat statbuf;
+  if (stat(filename.c_str(), &statbuf) != 0) { return false; }
+  return S_ISREG(statbuf.st_mode);
+}
+
+inline bool dir_exists(const std::string& dir)
+{
+  struct stat statbuf;
+  if (stat(dir.c_str(), &statbuf) != 0) { return false; }
+  return S_ISDIR(statbuf.st_mode);
+}
+
+inline bool create_dir(const std::string& dir)
+{
+  const auto path = split(dir, '/');
+
+  std::string cwd;
+  if (!dir.empty() && dir[0] == '/') { cwd += '/'; }
+
+  for (const auto& p : path) {
+    cwd += p + "/";
+    if (!dir_exists(cwd)) {
+      int ret = mkdir(cwd.c_str(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
+      if (ret != 0) { return false; }
+    }
+  }
+  return true;
+}
+
+inline void make_sure_parent_dir_exists(const std::string& file_path)
+{
+  const auto pos = file_path.rfind('/');
+  if (pos != std::string::npos) {
+    auto dir = file_path.substr(0, pos);
+    if (!dir_exists(dir)) { create_dir(dir); }
+  }
+}
+
+inline auto combine_path(const std::string& dir, const std::string& path)
+{
+  std::filesystem::path p_dir(dir);
+  std::filesystem::path p_suf(path);
+  return (p_dir / p_suf).string();
+}
+
+template <typename... Ts>
+void log_(const char* level, const Ts&... vs)
+{
+  char buf[20];
+  std::time_t now = std::time(nullptr);
+  std::strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", std::localtime(&now));
+  printf("%s [%s] ", buf, level);
+  if constexpr (sizeof...(Ts) == 1) {
+    printf("%s", vs...);
+  } else {
+    printf(vs...);
+  }
+  printf("\n");
+  fflush(stdout);
+}
+
+template <typename... Ts>
+void log_info(Ts&&... vs)
+{
+  log_("info", std::forward<Ts>(vs)...);
+}
+
+template <typename... Ts>
+void log_warn(Ts&&... vs)
+{
+  log_("warn", std::forward<Ts>(vs)...);
+}
+
+template <typename... Ts>
+void log_error(Ts&&... vs)
+{
+  log_("error", std::forward<Ts>(vs)...);
+}
+
+}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/faiss/faiss_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
index a01702fb1f..619565d107 100644
--- a/cpp/bench/ann/src/faiss/faiss_benchmark.cu
+++ b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
@@ -97,10 +97,8 @@ template <typename T>
 std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
                                                       const std::string& distance,
                                                       int dim,
-                                                      float refine_ratio,
                                                       const nlohmann::json& conf,
-                                                      const std::vector<int>& dev_list,
-                                                      const nlohmann::json& index_conf)
+                                                      const std::vector<int>& dev_list)
 {
   // stop compiler warning; not all algorithms support multi-GPU so it may not be used
   (void)dev_list;
@@ -124,7 +122,6 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
 
   if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
 
-  if (refine_ratio > 1.0) {}
   return ann;
 }
 
@@ -146,6 +143,11 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 
 }  // namespace raft::bench::ann
 
-#include "../common/benchmark.hpp"
+REGISTER_ALGO_INSTANCE(float);
+REGISTER_ALGO_INSTANCE(std::int8_t);
+REGISTER_ALGO_INSTANCE(std::uint8_t);
 
+#ifdef ANN_BENCH_BUILD_MAIN
+#include "../common/benchmark.hpp"
 int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
+#endif
diff --git a/cpp/bench/ann/src/faiss/faiss_wrapper.h b/cpp/bench/ann/src/faiss/faiss_wrapper.h
index 8cfc26ea5b..0dc06f99a5 100644
--- a/cpp/bench/ann/src/faiss/faiss_wrapper.h
+++ b/cpp/bench/ann/src/faiss/faiss_wrapper.h
@@ -16,6 +16,10 @@
 #ifndef FAISS_WRAPPER_H_
 #define FAISS_WRAPPER_H_
 
+#include "../common/ann_types.hpp"
+
+#include <raft/util/cudart_utils.hpp>
+
 #include <faiss/IndexFlat.h>
 #include <faiss/IndexIVFFlat.h>
 #include <faiss/IndexIVFPQ.h>
@@ -35,10 +39,6 @@
 #include <string>
 #include <type_traits>
 
-#include "../common/ann_types.hpp"
-#include "../common/benchmark_util.hpp"
-#include <raft/util/cudart_utils.hpp>
-
 namespace {
 
 faiss::MetricType parse_metric_type(raft::bench::ann::Metric metric)
@@ -98,13 +98,12 @@ class FaissGpu : public ANN<T> {
               float* distances,
               cudaStream_t stream = 0) const final;
 
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
     // to enable building big dataset which is larger than GPU memory
-    property.dataset_memory_type      = MemoryType::Host;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = false;
+    property.dataset_memory_type = MemoryType::Host;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
 
diff --git a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
index 63ceb5d771..99481c2921 100644
--- a/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
+++ b/cpp/bench/ann/src/ggnn/ggnn_benchmark.cu
@@ -84,10 +84,8 @@ template <typename T>
 std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
                                                       const std::string& distance,
                                                       int dim,
-                                                      float refine_ratio,
                                                       const nlohmann::json& conf,
-                                                      const std::vector<int>& dev_list,
-                                                      const nlohmann::json& index_conf)
+                                                      const std::vector<int>& dev_list)
 {
   // stop compiler warning; not all algorithms support multi-GPU so it may not be used
   (void)dev_list;
@@ -102,7 +100,6 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
   if (algo == "ggnn") { ann = make_algo<T, raft::bench::ann::Ggnn>(metric, dim, conf); }
   if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
 
-  if (refine_ratio > 1.0) {}
   return ann;
 }
 
@@ -121,6 +118,11 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 
 }  // namespace raft::bench::ann
 
-#include "../common/benchmark.hpp"
+REGISTER_ALGO_INSTANCE(float);
+REGISTER_ALGO_INSTANCE(std::int8_t);
+REGISTER_ALGO_INSTANCE(std::uint8_t);
 
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
\ No newline at end of file
+#ifdef ANN_BENCH_BUILD_MAIN
+#include "../common/benchmark.hpp"
+int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
+#endif
diff --git a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
index fd8fe0f2ec..74c7cddc3c 100644
--- a/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
+++ b/cpp/bench/ann/src/ggnn/ggnn_wrapper.cuh
@@ -16,14 +16,14 @@
 
 #pragma once
 
-#include <memory>
-#include <stdexcept>
-
 #include "../common/ann_types.hpp"
-#include "../common/benchmark_util.hpp"
+
 #include <ggnn/cuda_knn_ggnn_gpu_instance.cuh>
 #include <raft/util/cudart_utils.hpp>
 
+#include <memory>
+#include <stdexcept>
+
 namespace raft::bench::ann {
 
 template <typename T, DistanceMeasure measure, int D, int KBuild, int KQuery, int S>
@@ -50,6 +50,7 @@ class Ggnn : public ANN<T> {
     int max_iterations{400};
     int cache_size{512};
     int sorted_size{256};
+    auto needs_dataset() const -> bool override { return true; }
   };
 
   Ggnn(Metric metric, int dim, const BuildParam& param);
@@ -74,7 +75,7 @@ class Ggnn : public ANN<T> {
   void save(const std::string& file) const override { impl_->save(file); }
   void load(const std::string& file) override { impl_->load(file); }
 
-  AlgoProperty get_property() const override { return impl_->get_property(); }
+  AlgoProperty get_preference() const override { return impl_->get_preference(); }
 
   void set_search_dataset(const T* dataset, size_t nrow) override
   {
@@ -135,12 +136,11 @@ class GgnnImpl : public ANN<T> {
   void save(const std::string& file) const override;
   void load(const std::string& file) override;
 
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = MemoryType::Device;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = true;
+    property.dataset_memory_type = MemoryType::Device;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
 
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
index 885d0e3ba4..be5b72c5f6 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "../common/ann_types.hpp"
+
 #include <algorithm>
 #include <cmath>
 #include <memory>
@@ -22,9 +24,6 @@
 #include <type_traits>
 #include <utility>
 
-#include "../common/benchmark_util.hpp"
-
-#include "../common/ann_types.hpp"
 #undef WARP_SIZE
 #include "hnswlib_wrapper.h"
 #define JSON_DIAGNOSTICS 1
@@ -76,10 +75,8 @@ template <typename T>
 std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
                                                       const std::string& distance,
                                                       int dim,
-                                                      float refine_ratio,
                                                       const nlohmann::json& conf,
-                                                      const std::vector<int>& dev_list,
-                                                      const nlohmann::json& index_conf)
+                                                      const std::vector<int>& dev_list)
 {
   // stop compiler warning; not all algorithms support multi-GPU so it may not be used
   (void)dev_list;
@@ -96,8 +93,6 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
   }
 
   if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
-
-  if (refine_ratio > 1.0) {}
   return ann;
 }
 
@@ -116,6 +111,12 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 
 };  // namespace raft::bench::ann
 
-#include "../common/benchmark.hpp"
+REGISTER_ALGO_INSTANCE(float);
+REGISTER_ALGO_INSTANCE(std::int8_t);
+REGISTER_ALGO_INSTANCE(std::uint8_t);
 
-int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
\ No newline at end of file
+#ifdef ANN_BENCH_BUILD_MAIN
+#define CPU_ONLY
+#include "../common/benchmark.hpp"
+int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
+#endif
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
index c5c3a4a2a6..5cd33ef94d 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h
@@ -188,12 +188,11 @@ class HnswLib : public ANN<T> {
   void save(const std::string& path_to_index) const override;
   void load(const std::string& path_to_index) override;
 
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = MemoryType::Host;
-    property.query_memory_type        = MemoryType::Host;
-    property.need_dataset_when_search = false;
+    property.dataset_memory_type = MemoryType::Host;
+    property.query_memory_type   = MemoryType::Host;
     return property;
   }
 
diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu
index 24f0df4b47..823fa3f2f3 100644
--- a/cpp/bench/ann/src/raft/raft_benchmark.cu
+++ b/cpp/bench/ann/src/raft/raft_benchmark.cu
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "../common/ann_types.hpp"
+
 #include <algorithm>
 #include <cmath>
 #include <memory>
@@ -22,8 +24,6 @@
 #include <type_traits>
 #include <utility>
 
-#include "../common/ann_types.hpp"
-#include "../common/benchmark_util.hpp"
 #undef WARP_SIZE
 #ifdef RAFT_ANN_BENCH_USE_RAFT_BFKNN
 #include "raft_wrapper.h"
@@ -120,6 +120,10 @@ void parse_search_param(const nlohmann::json& conf,
     // set half as default
     param.pq_param.lut_dtype = CUDA_R_16F;
   }
+  if (conf.contains("refine_ratio")) {
+    param.refine_ratio = conf.at("refine_ratio");
+    if (param.refine_ratio < 1.0f) { throw std::runtime_error("refine_ratio should be >= 1.0"); }
+  }
 }
 #endif
 
@@ -165,31 +169,18 @@ template <typename T>
 std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
                                                       const std::string& distance,
                                                       int dim,
-                                                      float refine_ratio,
                                                       const nlohmann::json& conf,
-                                                      const std::vector<int>& dev_list,
-                                                      const nlohmann::json& index_conf)
+                                                      const std::vector<int>& dev_list)
 {
   // stop compiler warning; not all algorithms support multi-GPU so it may not be used
   (void)dev_list;
 
   raft::bench::ann::Metric metric = parse_metric(distance);
-  std::string memtype             = conf.at("dataset_memtype");
-
-  MemoryType dataset_memorytype = MemoryType::Device;
-  if (memtype == "host") {
-    dataset_memorytype = MemoryType::Host;
-  } else if (memtype == "mmap") {
-    dataset_memorytype = MemoryType::HostMmap;
-  }
-
   std::unique_ptr<raft::bench::ann::ANN<T>> ann;
 
   if constexpr (std::is_same_v<T, float>) {
 #ifdef RAFT_ANN_BENCH_USE_RAFT_BFKNN
-    if (algo == "raft_bfknn") {
-      ann = std::make_unique<raft::bench::ann::RaftGpu<T>>(metric, dim, dataset_memorytype);
-    }
+    if (algo == "raft_bfknn") { ann = std::make_unique<raft::bench::ann::RaftGpu<T>>(metric, dim); }
 #endif
   }
 
@@ -199,29 +190,25 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
   if (algo == "raft_ivf_flat") {
     typename raft::bench::ann::RaftIvfFlatGpu<T, int64_t>::BuildParam param;
     parse_build_param<T, int64_t>(conf, param);
-    ann = std::make_unique<raft::bench::ann::RaftIvfFlatGpu<T, int64_t>>(
-      metric, dim, param, dataset_memorytype);
+    ann = std::make_unique<raft::bench::ann::RaftIvfFlatGpu<T, int64_t>>(metric, dim, param);
   }
 #endif
 #ifdef RAFT_ANN_BENCH_USE_RAFT_IVF_PQ
   if (algo == "raft_ivf_pq") {
     typename raft::bench::ann::RaftIvfPQ<T, int64_t>::BuildParam param;
     parse_build_param<T, int64_t>(conf, param);
-    ann = std::make_unique<raft::bench::ann::RaftIvfPQ<T, int64_t>>(
-      metric, dim, param, refine_ratio, dataset_memorytype);
+    ann = std::make_unique<raft::bench::ann::RaftIvfPQ<T, int64_t>>(metric, dim, param);
   }
 #endif
 #ifdef RAFT_ANN_BENCH_USE_RAFT_CAGRA
   if (algo == "raft_cagra") {
     typename raft::bench::ann::RaftCagra<T, uint32_t>::BuildParam param;
     parse_build_param<T, uint32_t>(conf, param);
-    ann = std::make_unique<raft::bench::ann::RaftCagra<T, uint32_t>>(
-      metric, dim, param, dataset_memorytype);
+    ann = std::make_unique<raft::bench::ann::RaftCagra<T, uint32_t>>(metric, dim, param);
   }
 #endif
   if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); }
 
-  if (refine_ratio > 1.0) {}
   return ann;
 }
 
@@ -263,6 +250,11 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
 
 };  // namespace raft::bench::ann
 
-#include "../common/benchmark.hpp"
+REGISTER_ALGO_INSTANCE(float);
+REGISTER_ALGO_INSTANCE(std::int8_t);
+REGISTER_ALGO_INSTANCE(std::uint8_t);
 
+#ifdef ANN_BENCH_BUILD_MAIN
+#include "../common/benchmark.hpp"
 int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
+#endif
diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
index c8bfe9b401..02aa2ea28b 100644
--- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h
@@ -29,6 +29,7 @@
 #include <raft/neighbors/cagra.cuh>
 #include <raft/neighbors/cagra_serialize.cuh>
 #include <raft/neighbors/cagra_types.hpp>
+#include <raft/spatial/knn/detail/ann_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
 #include <stdexcept>
@@ -48,11 +49,23 @@ class RaftCagra : public ANN<T> {
 
   struct SearchParam : public AnnSearchParam {
     raft::neighbors::experimental::cagra::search_params p;
+    auto needs_dataset() const -> bool override { return true; }
   };
 
   using BuildParam = raft::neighbors::cagra::index_params;
 
-  RaftCagra(Metric metric, int dim, const BuildParam& param, MemoryType dataset_memtype);
+  RaftCagra(Metric metric, int dim, const BuildParam& param)
+    : ANN<T>(metric, dim),
+      index_params_(param),
+      dimension_(dim),
+      mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
+  {
+    rmm::mr::set_current_device_resource(&mr_);
+    index_params_.metric = parse_metric_type(metric);
+    RAFT_CUDA_TRY(cudaGetDevice(&device_));
+  }
+
+  ~RaftCagra() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
 
   void build(const T* dataset, size_t nrow, cudaStream_t stream) final;
 
@@ -70,57 +83,43 @@ class RaftCagra : public ANN<T> {
               cudaStream_t stream = 0) const override;
 
   // to enable dataset access from GPU memory
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = dataset_memtype_;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = true;
+    property.dataset_memory_type = MemoryType::HostMmap;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
   void save(const std::string& file) const override;
   void load(const std::string&) override;
 
-  ~RaftCagra() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
-
  private:
+  // `mr_` must go first to make sure it dies last
+  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
   raft::device_resources handle_;
   BuildParam index_params_;
   raft::neighbors::cagra::search_params search_params_;
   std::optional<raft::neighbors::cagra::index<T, IdxT>> index_;
   int device_;
   int dimension_;
-  MemoryType dataset_memtype_;
-  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
 };
 
-template <typename T, typename IdxT>
-RaftCagra<T, IdxT>::RaftCagra(Metric metric,
-                              int dim,
-                              const BuildParam& param,
-                              MemoryType dataset_memtype)
-  : ANN<T>(metric, dim),
-    index_params_(param),
-    dimension_(dim),
-    dataset_memtype_(dataset_memtype),
-    mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
-{
-  rmm::mr::set_current_device_resource(&mr_);
-  index_params_.metric = parse_metric_type(metric);
-  RAFT_CUDA_TRY(cudaGetDevice(&device_));
-}
-
 template <typename T, typename IdxT>
 void RaftCagra<T, IdxT>::build(const T* dataset, size_t nrow, cudaStream_t)
 {
-  if (get_property().dataset_memory_type != MemoryType::Device) {
-    auto dataset_view =
-      raft::make_host_matrix_view<const T, int64_t>(dataset, IdxT(nrow), dimension_);
-    index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view));
-  } else {
-    auto dataset_view =
-      raft::make_device_matrix_view<const T, int64_t>(dataset, IdxT(nrow), dimension_);
-    index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view));
+  switch (raft::spatial::knn::detail::utils::check_pointer_residency(dataset)) {
+    case raft::spatial::knn::detail::utils::pointer_residency::host_only: {
+      auto dataset_view =
+        raft::make_host_matrix_view<const T, int64_t>(dataset, IdxT(nrow), dimension_);
+      index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view));
+      return;
+    }
+    default: {
+      auto dataset_view =
+        raft::make_device_matrix_view<const T, int64_t>(dataset, IdxT(nrow), dimension_);
+      index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view));
+      return;
+    }
   }
 }
 
diff --git a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
index 69057c7a4f..da457e32f1 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
@@ -52,7 +52,19 @@ class RaftIvfFlatGpu : public ANN<T> {
 
   using BuildParam = raft::neighbors::ivf_flat::index_params;
 
-  RaftIvfFlatGpu(Metric metric, int dim, const BuildParam& param, MemoryType dataset_memtype);
+  RaftIvfFlatGpu(Metric metric, int dim, const BuildParam& param)
+    : ANN<T>(metric, dim),
+      index_params_(param),
+      dimension_(dim),
+      mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
+  {
+    index_params_.metric                         = parse_metric_type(metric);
+    index_params_.conservative_memory_allocation = true;
+    rmm::mr::set_current_device_resource(&mr_);
+    RAFT_CUDA_TRY(cudaGetDevice(&device_));
+  }
+
+  ~RaftIvfFlatGpu() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
 
   void build(const T* dataset, size_t nrow, cudaStream_t stream) final;
 
@@ -68,47 +80,27 @@ class RaftIvfFlatGpu : public ANN<T> {
               cudaStream_t stream = 0) const override;
 
   // to enable dataset access from GPU memory
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = dataset_memtype_;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = false;
+    property.dataset_memory_type = MemoryType::Device;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
   void save(const std::string& file) const override;
   void load(const std::string&) override;
 
-  ~RaftIvfFlatGpu() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
-
  private:
+  // `mr_` must go first to make sure it dies last
+  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
   raft::device_resources handle_;
   BuildParam index_params_;
   raft::neighbors::ivf_flat::search_params search_params_;
   std::optional<raft::neighbors::ivf_flat::index<T, IdxT>> index_;
   int device_;
   int dimension_;
-  MemoryType dataset_memtype_;
-  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
 };
 
-template <typename T, typename IdxT>
-RaftIvfFlatGpu<T, IdxT>::RaftIvfFlatGpu(Metric metric,
-                                        int dim,
-                                        const BuildParam& param,
-                                        MemoryType dataset_memtype)
-  : ANN<T>(metric, dim),
-    index_params_(param),
-    dimension_(dim),
-    dataset_memtype_(dataset_memtype),
-    mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
-{
-  index_params_.metric                         = parse_metric_type(metric);
-  index_params_.conservative_memory_allocation = true;
-  rmm::mr::set_current_device_resource(&mr_);
-  RAFT_CUDA_TRY(cudaGetDevice(&device_));
-}
-
 template <typename T, typename IdxT>
 void RaftIvfFlatGpu<T, IdxT>::build(const T* dataset, size_t nrow, cudaStream_t)
 {
diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
index d03912afc2..0d4bca75cc 100644
--- a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h
@@ -25,6 +25,7 @@
 #include <raft/distance/distance_types.hpp>
 #include <raft/linalg/unary_op.cuh>
 #include <raft/neighbors/ivf_pq_types.hpp>
+#include <raft/spatial/knn/detail/ann_utils.cuh>
 #include <raft/util/cudart_utils.hpp>
 #include <raft_runtime/neighbors/ivf_pq.hpp>
 #include <raft_runtime/neighbors/refine.hpp>
@@ -47,15 +48,24 @@ class RaftIvfPQ : public ANN<T> {
 
   struct SearchParam : public AnnSearchParam {
     raft::neighbors::ivf_pq::search_params pq_param;
+    float refine_ratio = 1.0f;
+    auto needs_dataset() const -> bool override { return refine_ratio > 1.0f; }
   };
 
   using BuildParam = raft::neighbors::ivf_pq::index_params;
 
-  RaftIvfPQ(Metric metric,
-            int dim,
-            const BuildParam& param,
-            float refine_ratio,
-            MemoryType dataset_memtype);
+  RaftIvfPQ(Metric metric, int dim, const BuildParam& param)
+    : ANN<T>(metric, dim),
+      index_params_(param),
+      dimension_(dim),
+      mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
+  {
+    rmm::mr::set_current_device_resource(&mr_);
+    index_params_.metric = parse_metric_type(metric);
+    RAFT_CUDA_TRY(cudaGetDevice(&device_));
+  }
+
+  ~RaftIvfPQ() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
 
   void build(const T* dataset, size_t nrow, cudaStream_t stream) final;
 
@@ -72,20 +82,19 @@ class RaftIvfPQ : public ANN<T> {
               cudaStream_t stream = 0) const override;
 
   // to enable dataset access from GPU memory
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = dataset_memtype_;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = refine_ratio_ > 1.0;
+    property.dataset_memory_type = MemoryType::Host;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
   void save(const std::string& file) const override;
   void load(const std::string&) override;
 
-  ~RaftIvfPQ() noexcept { rmm::mr::set_current_device_resource(mr_.get_upstream()); }
-
  private:
+  // `mr_` must go first to make sure it dies last
+  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
   raft::device_resources handle_;
   BuildParam index_params_;
   raft::neighbors::ivf_pq::search_params search_params_;
@@ -93,24 +102,8 @@ class RaftIvfPQ : public ANN<T> {
   int device_;
   int dimension_;
   float refine_ratio_ = 1.0;
-  MemoryType dataset_memtype_;
-  rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource> mr_;
   raft::device_matrix_view<const T, IdxT> dataset_;
 };
-template <typename T, typename IdxT>
-RaftIvfPQ<T, IdxT>::RaftIvfPQ(
-  Metric metric, int dim, const BuildParam& param, float refine_ratio, MemoryType dataset_memtype)
-  : ANN<T>(metric, dim),
-    index_params_(param),
-    dimension_(dim),
-    refine_ratio_(refine_ratio),
-    dataset_memtype_(dataset_memtype),
-    mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull)
-{
-  rmm::mr::set_current_device_resource(&mr_);
-  index_params_.metric = parse_metric_type(metric);
-  RAFT_CUDA_TRY(cudaGetDevice(&device_));
-}
 
 template <typename T, typename IdxT>
 void RaftIvfPQ<T, IdxT>::save(const std::string& file) const
@@ -141,6 +134,7 @@ void RaftIvfPQ<T, IdxT>::set_search_param(const AnnSearchParam& param)
 {
   auto search_param = dynamic_cast<const SearchParam&>(param);
   search_params_    = search_param.pq_param;
+  refine_ratio_     = search_param.refine_ratio;
   assert(search_params_.n_probes <= index_params_.n_lists);
 }
 
@@ -168,7 +162,8 @@ void RaftIvfPQ<T, IdxT>::search(const T* queries,
     raft::runtime::neighbors::ivf_pq::search(
       handle_, search_params_, *index_, queries_v, candidates.view(), distances_tmp.view());
 
-    if (get_property().dataset_memory_type == MemoryType::Device) {
+    if (raft::spatial::knn::detail::utils::check_pointer_residency(dataset_.data_handle()) ==
+        raft::spatial::knn::detail::utils::pointer_residency::device_only) {
       auto queries_v =
         raft::make_device_matrix_view<const T, IdxT>(queries, batch_size, index_->dim());
       auto neighbors_v = raft::make_device_matrix_view<IdxT, IdxT>((IdxT*)neighbors, batch_size, k);
diff --git a/cpp/bench/ann/src/raft/raft_wrapper.h b/cpp/bench/ann/src/raft/raft_wrapper.h
index 01f206ab70..c8d98460b7 100644
--- a/cpp/bench/ann/src/raft/raft_wrapper.h
+++ b/cpp/bench/ann/src/raft/raft_wrapper.h
@@ -65,12 +65,11 @@ class RaftGpu : public ANN<T> {
               cudaStream_t stream = 0) const final;
 
   // to enable dataset access from GPU memory
-  AlgoProperty get_property() const override
+  AlgoProperty get_preference() const override
   {
     AlgoProperty property;
-    property.dataset_memory_type      = MemoryType::Device;
-    property.query_memory_type        = MemoryType::Device;
-    property.need_dataset_when_search = true;
+    property.dataset_memory_type = MemoryType::Device;
+    property.query_memory_type   = MemoryType::Device;
     return property;
   }
   void set_search_dataset(const T* dataset, size_t nrow) override;
diff --git a/dependencies.yaml b/dependencies.yaml
index cf8170b9a1..d90a6e6e64 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -98,6 +98,20 @@ files:
       key: test
     includes:
       - test_python_common
+  py_build_raft_ann_bench:
+    output: pyproject
+    pyproject_dir: python/raft-ann-bench
+    extras:
+      table: build-system
+    includes:
+      - build_wheels
+  py_run_raft_ann_bench:
+    output: pyproject
+    pyproject_dir: python/raft-ann-bench
+    extras:
+      table: project
+    includes:
+      - nn_bench_python
 channels:
   - rapidsai
   - rapidsai-nightly
@@ -161,7 +175,7 @@ dependencies:
           - clang-tools=16.0.1
   nn_bench:
     common:
-      - output_types: [conda]
+      - output_types: [conda, pyproject, requirements]
         packages:
           - hnswlib=0.7.0
           - nlohmann_json>=3.11.2
@@ -171,6 +185,12 @@ dependencies:
           - faiss-proc=*=cuda
           - matplotlib
           - pyyaml
+  nn_bench_python:
+    common:
+      - output_types: [conda]
+        packages:
+          - matplotlib
+          - pyyaml
 
   cudatoolkit:
     specific:
@@ -305,16 +325,16 @@ dependencies:
     common:
       - output_types: [conda, pyproject]
         packages:
-          - dask>=2023.7.1
+          - dask==2023.7.1
           - dask-cuda==23.10.*
-          - distributed>=2023.7.1
+          - distributed==2023.7.1
           - joblib>=0.11
           - numba>=0.57
           - *numpy
           - ucx-py==0.34.*
       - output_types: conda
         packages:
-          - dask-core>=2023.7.1
+          - dask-core==2023.7.1
           - ucx>=1.13.0
           - ucx-proc=*=gpu
       - output_types: pyproject
diff --git a/docs/source/ann_benchmarks_low_level.md b/docs/source/ann_benchmarks_low_level.md
index f95d01f66f..d08a3a1791 100644
--- a/docs/source/ann_benchmarks_low_level.md
+++ b/docs/source/ann_benchmarks_low_level.md
@@ -21,126 +21,203 @@ mv glove-100-angular.groundtruth.distances.fbin glove-100-inner/groundtruth.dist
 popd
 
 # (2) build index
-./cpp/build/RAFT_IVF_FLAT_ANN_BENCH -b -i raft_ivf_flat.nlist1024 conf/glove-100-inner.json
+./cpp/build/RAFT_IVF_FLAT_ANN_BENCH \
+  --data_prefix=cpp/bench/ann/data \
+  --build \
+  --benchmark_filter="raft_ivf_flat\..*" \
+  cpp/bench/ann/conf/glove-100-inner.json
 
 # (3) search
-./cpp/build/RAFT_IVF_FLAT_ANN_BENCH -s -i raft_ivf_flat.nlist1024 conf/glove-100-inner.json
-
-# (4) evaluate result
-pushd
-cd cpp/bench/ann
-./scripts/eval.pl \
-  -o result.csv \
-  data/glove-100-inner/groundtruth.neighbors.ibin \
-  result/glove-100-inner/faiss_ivf_flat
-popd 
-
-# optional step: plot QPS-Recall figure using data in result.csv with your favorite tool
+./cpp/build/RAFT_IVF_FLAT_ANN_BENCH \
+  --data_prefix=cpp/bench/ann/data \
+  --benchmark_min_time=2s \
+  --benchmark_out=ivf_flat_search.csv \
+  --benchmark_out_format=csv \
+  --benchmark_counters_tabular \
+  --search \
+  --benchmark_filter="raft_ivf_flat\..*"
+  cpp/bench/ann/conf/glove-100-inner.json
+
+# optional step: plot QPS-Recall figure using data in ivf_flat_search.csv with your favorite tool
 ```
 
-##### Step 1: Prepare Dataset <a id='bash-prepare-dataset'></a>
-[Instructions](ann_benchmarks_dataset.md)
+##### Step 1: Prepare Dataset
+A dataset usually has 4 binary files containing database vectors, query vectors, ground truth neighbors and their corresponding distances. For example, Glove-100 dataset has files `base.fbin` (database vectors), `query.fbin` (query vectors), `groundtruth.neighbors.ibin` (ground truth neighbors), and `groundtruth.distances.fbin` (ground truth distances). The first two files are for index building and searching, while the other two are associated with a particular distance and are used for evaluation.
+
+The file suffixes `.fbin`, `.f16bin`, `.ibin`, `.u8bin`, and `.i8bin` denote that the data type of vectors stored in the file are `float32`, `float16`(a.k.a `half`), `int`, `uint8`, and `int8`, respectively.
+These binary files are little-endian and the format is: the first 8 bytes are `num_vectors` (`uint32_t`) and `num_dimensions` (`uint32_t`), and the following `num_vectors * num_dimensions * sizeof(type)` bytes are vectors stored in row-major order.
+
+Some implementation can take `float16` database and query vectors as inputs and will have better performance. Use `script/fbin_to_f16bin.py` to transform dataset from `float32` to `float16` type.
+
+Commonly used datasets can be downloaded from two websites:
+1. Million-scale datasets can be found at the [Data sets](https://github.com/erikbern/ann-benchmarks#data-sets) section of [`ann-benchmarks`](https://github.com/erikbern/ann-benchmarks).
+
+    However, these datasets are in HDF5 format. Use `cpp/bench/ann/scripts/hdf5_to_fbin.py` to transform the format. A few Python packages are required to run it:
+    ```bash
+    pip3 install numpy h5py
+    ```
+    The usage of this script is:
+    ```bash
+    $ cpp/bench/ann/scripts/hdf5_to_fbin.py
+    usage: scripts/hdf5_to_fbin.py [-n] <input>.hdf5
+       -n: normalize base/query set
+     outputs: <input>.base.fbin
+              <input>.query.fbin
+              <input>.groundtruth.neighbors.ibin
+              <input>.groundtruth.distances.fbin
+    ```
+    So for an input `.hdf5` file, four output binary files will be produced. See previous section for an example of prepossessing GloVe dataset.
+
+    Most datasets provided by `ann-benchmarks` use `Angular` or `Euclidean` distance. `Angular` denotes cosine distance. However, computing cosine distance reduces to computing inner product by normalizing vectors beforehand. In practice, we can always do the normalization to decrease computation cost, so it's better to measure the performance of inner product rather than cosine distance. The `-n` option of `hdf5_to_fbin.py` can be used to normalize the dataset.
+
+2. Billion-scale datasets can be found at [`big-ann-benchmarks`](http://big-ann-benchmarks.com). The ground truth file contains both neighbors and distances, thus should be split. A script is provided for this:
+    ```bash
+    $ cpp/bench/ann/scripts/split_groundtruth.pl
+    usage: script/split_groundtruth.pl input output_prefix
+    ```
+    Take Deep-1B dataset as an example:
+    ```bash
+    pushd
+    cd cpp/bench/ann
+    mkdir -p data/deep-1B && cd data/deep-1B
+    # download manually "Ground Truth" file of "Yandex DEEP"
+    # suppose the file name is deep_new_groundtruth.public.10K.bin
+    ../../scripts/split_groundtruth.pl deep_new_groundtruth.public.10K.bin groundtruth
+    # two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced
+    popd
+    ```
+    Besides ground truth files for the whole billion-scale datasets, this site also provides ground truth files for the first 10M or 100M vectors of the base sets. This mean we can use these billion-scale datasets as million-scale datasets. To facilitate this, an optional parameter `subset_size` for dataset can be used. See the next step for further explanation.
 
 
 ##### Step 2: Build Index
-An index is a data structure to facilitate searching. Different algorithms may use different data structures for their index. We can use `RAFT_IVF_FLAT_ANN_BENCH -b` to build an index and save it to disk.
+An index is a data structure to facilitate searching. Different algorithms may use different data structures for their index. We can use `RAFT_IVF_FLAT_ANN_BENCH --build` to build an index and save it to disk.
 
 To run a benchmark executable, like `RAFT_IVF_FLAT_ANN_BENCH`, a JSON configuration file is required. Refer to [`cpp/bench/ann/conf/glove-100-inner.json`](../../cpp/cpp/bench/ann/conf/glove-100-inner.json) as an example. Configuration file has 3 sections:
 * `dataset` section specifies the name and files of a dataset, and also the distance in use. Since the `*_ANN_BENCH` programs are for index building and searching, only `base_file` for database vectors and `query_file` for query vectors are needed. Ground truth files are for evaluation thus not needed.
     - To use only a subset of the base dataset, an optional parameter `subset_size` can be specified. It means using only the first `subset_size` vectors of `base_file` as the base dataset.
 * `search_basic_param` section specifies basic parameters for searching:
     - `k` is the "k" in "k-nn", that is, the number of neighbors (or results) we want from the searching.
-    -  `run_count` means how many times we run the searching. A single run of searching will search neighbors for all vectors in `test` set. The total time used for a run is recorded, and the final searching time is the smallest one among these runs.
 * `index` section specifies an array of configurations for index building and searching:
     - `build_param` and `search_params` are parameters for building and searching, respectively. `search_params` is an array since we will search with different parameters to get different recall values.
     - `file` is the file name of index. Building will save built index to this file, while searching will load this file.
-    - `search_result_file` is the file name prefix of searching results. Searching will save results to these files, and plotting script will read these files to plot results. Note this is a prefix rather than a whole file name. Suppose its value is `${prefix}`, then the real file names are like `${prefix}.0.{ibin|txt}`, `${prefix}.1.{ibin|txt}`, etc. Each of them corresponds to an item in `search_params` array. That is, for one searching parameter, there will be some corresponding search result files.
-    - if `multigpu` is specified, multiple GPUs will be used for index build and search.
     - if `refine_ratio` is specified, refinement, as a post-processing step of search, will be done. It's for algorithms that compress vectors. For example, if `"refine_ratio" : 2` is set, 2`k` results are first computed, then exact distances of them are computed using original uncompressed vectors, and finally top `k` results among them are kept.
 
 
-The usage of `*_ANN_BENCH` can be found by running `*_ANN_BENCH -h` on one of the executables:
+The usage of `*_ANN_BENCH` can be found by running `*_ANN_BENCH --help` on one of the executables:
 ```bash
-$ ./cpp/build/*_ANN_BENCH -h
-usage: ./cpp/build/*_ANN_BENCH -b|s [-f] [-i index_names] conf.json
-   -b: build mode, will build index
-   -s: search mode, will search using built index
-       one and only one of -b and -s should be specified
-   -f: force overwriting existing output files
-   -i: by default will build/search all the indices found in conf.json
-       '-i' can be used to select a subset of indices
-       'index_names' is a list of comma-separated index names
-       '*' is allowed as the last character of a name to select all matched indices
-       for example, -i "hnsw1,hnsw2,faiss" or -i "hnsw*,faiss"
-```
-* `-b`: build index.
-* `-s`: do the searching with built index.
-* `-f`: before doing the real task, the program checks that needed input files exist and output files don't exist. If these conditions are not met, it quits so no file would be overwritten accidentally. To ignore existing output files and force overwrite them, use the `-f` option.
-* `-i`: by default, the `-b` flag will build all indices found in the configuration file, and `-s` will search using all the indices. To select a subset of indices to build or search, we can use the `-i` option.
-
-It's easier to describe the usage of `-i` option with an example. Suppose we have a configuration file `a.json`, and it contains:
-```json
-  "index" : [
-    {
-      "name" : "hnsw1",
-      ...
-    },
-    {
-      "name" : "hnsw1",
-      ...
-    },
-    {
-      "name" : "faiss",
-      ...
-    }
-  ]
+$ ./cpp/build/*_ANN_BENCH --help
+benchmark [--benchmark_list_tests={true|false}]
+          [--benchmark_filter=<regex>]
+          [--benchmark_min_time=`<integer>x` OR `<float>s` ]
+          [--benchmark_min_warmup_time=<min_warmup_time>]
+          [--benchmark_repetitions=<num_repetitions>]
+          [--benchmark_enable_random_interleaving={true|false}]
+          [--benchmark_report_aggregates_only={true|false}]
+          [--benchmark_display_aggregates_only={true|false}]
+          [--benchmark_format=<console|json|csv>]
+          [--benchmark_out=<filename>]
+          [--benchmark_out_format=<json|console|csv>]
+          [--benchmark_color={auto|true|false}]
+          [--benchmark_counters_tabular={true|false}]
+          [--benchmark_context=<key>=<value>,...]
+          [--benchmark_time_unit={ns|us|ms|s}]
+          [--v=<verbosity>]
+          [--build|--search]
+          [--overwrite]
+          [--data_prefix=<prefix>]
+          <conf>.json
+
+Note the non-standard benchmark parameters:
+  --build: build mode, will build index
+  --search: search mode, will search using the built index
+            one and only one of --build and --search should be specified
+  --overwrite: force overwriting existing index files
+  --data_prefix=<prefix>: prepend <prefix> to dataset file paths specified in the <conf>.json.
+  --override_kv=<key:value1:value2:...:valueN>: override a build/search key one or more times multiplying the number of configurations; you can use this parameter multiple times to get the Cartesian product of benchmark configs.
 ```
-Then,
-```bash
-# build all indices: hnsw1, hnsw2 and faiss
-./cpp/build/HNSWLIB_ANN_BENCH -b a.json
-
-# build only hnsw1
-./cpp/build/HNSWLIB_ANN_BENCH -b -i hnsw1 a.json
+* `--build`: build index.
+* `--search`: do the searching with built index.
+* `--overwrite`: by default, the building mode skips building an index if it find out it already exists. This is useful when adding more configurations to the config; only new indices are build without the need to specify an elaborate filtering regex. By supplying `overwrite` flag, you disable this behavior; all indices are build regardless whether they are already stored on disk.
+* `--data_prefix`: prepend an arbitrary path to the data file paths. By default, it is equal to `data`. Note, this does not apply to index file paths.
+* `--override_kv`: override a build/search key one or more times multiplying the number of configurations.
 
-# build hnsw1 and hnsw2
-./cpp/build/HNSWLIB_ANN_BENCH -b -i hnsw1,hnsw2 a.json
-
-# build hnsw1 and hnsw2
-./cpp/build/HNSWLIB_ANN_BENCH -b -i 'hnsw*' a.json
-
-# build faiss
-./cpp/build/FAISS_IVF_FLAT_ANN_BENCH -b -i 'faiss' a.json
-```
-In the last two commands, we use wildcard "`*`" to match both `hnsw1` and `hnsw2`. Note the use of "`*`" is quite limited. It can occur only at the end of a pattern, so both "`*nsw1`" and "`h*sw1`" are interpreted literally and will not match anything. Also note that quotation marks must be used to prevent "`*`" from being interpreted by the shell.
+In addition to these ANN-specific flags, you can use all of the standard google benchmark flags. Some of the useful flags:
+* `--benchmark_filter`: specify subset of benchmarks to run
+* `--benchmark_out`, `--benchmark_out_format`: store the output to a file
+* `--benchmark_list_tests`: check the available configurations
+* `--benchmark_min_time`: specify the minimum duration or number of iterations per case to improve accuracy of the benchmarks.
 
+Refer to the google benchmark [user guide](https://github.com/google/benchmark/blob/main/docs/user_guide.md#command-line) for more information about the command-line usage.
 
 ##### Step 3: Searching
-Use the `-s` flag on any of the `*_ANN_BENCH` executables. Other options are the same as in step 2.
-
-
-##### Step 4: Evaluating Results
-Use `cpp/bench/ann/scripts/eval.pl` to evaluate benchmark results. The usage is:
-```bash
-$ cpp/bench/ann/scripts/eval.pl
-usage: [-f] [-o output.csv] groundtruth.neighbors.ibin result_paths...
-  result_paths... are paths to the search result files.
-    Can specify multiple paths.
-    For each of them, if it's a directory, all the .txt files found under
-    it recursively will be regarded as inputs.
-
-  -f: force to recompute recall and update it in result file if needed
-  -o: also write result to a csv file
+Use the `--search` flag on any of the `*_ANN_BENCH` executables. Other options are the same as in step 2.
+
+## Adding a new ANN algorithm
+Implementation of a new algorithm should be a class that inherits `class ANN` (defined in `cpp/bench/ann/src/ann.h`) and implements all the pure virtual functions.
+
+In addition, it should define two `struct`s for building and searching parameters. The searching parameter class should inherit `struct ANN<T>::AnnSearchParam`. Take `class HnswLib` as an example, its definition is:
+```c++
+template<typename T>
+class HnswLib : public ANN<T> {
+public:
+  struct BuildParam {
+    int M;
+    int ef_construction;
+    int num_threads;
+  };
+
+  using typename ANN<T>::AnnSearchParam;
+  struct SearchParam : public AnnSearchParam {
+    int ef;
+    int num_threads;
+  };
+
+  // ...
+};
 ```
-<a id='result-filepath-example'></a>Note that there can be multiple arguments for paths of result files. Each argument can be either a file name or a path. If it's a directory, all files found under it recursively will be used as input files.
-An example:
-```bash
-cpp/bench/ann/scripts/eval.pl groundtruth.neighbors.ibin \
-  result/glove-100-angular/10/hnsw/angular_M_24_*.txt \
-  result/glove-100-angular/10/faiss/
+
+The benchmark program uses JSON configuration file. To add the new algorithm to the benchmark, need be able to specify `build_param`, whose value is a JSON object, and `search_params`, whose value is an array of JSON objects, for this algorithm in configuration file. Still take the configuration for `HnswLib` as an example:
+```json
+{
+  "name" : "...",
+  "algo" : "hnswlib",
+  "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+  "file" : "/path/to/file",
+  "search_params" : [
+    {"ef":10, "numThreads":1},
+    {"ef":20, "numThreads":1},
+    {"ef":40, "numThreads":1}
+  ]
+},
 ```
-The search result files used by this command are files matching `result/glove-100-angular/10/hnsw/angular_M_24_*.txt`, and all `.txt` files under directory `result/glove-100-angular/10/faiss/` recursively.
 
-This script prints recall and QPS for every result file. Also, it outputs estimated "recall at QPS=2000" and "QPS at recall=0.9", which can be used to compare performance quantitatively.
+How to interpret these JSON objects is totally left to the implementation and should be specified in `cpp/bench/ann/src/factory.cuh`:
+1. First, add two functions for parsing JSON object to `struct BuildParam` and `struct SearchParam`, respectively:
+    ```c++
+    template<typename T>
+    void parse_build_param(const nlohmann::json& conf,
+                           typename cuann::HnswLib<T>::BuildParam& param) {
+      param.ef_construction = conf.at("efConstruction");
+      param.M = conf.at("M");
+      if (conf.contains("numThreads")) {
+        param.num_threads = conf.at("numThreads");
+      }
+    }
 
-It saves recall value in result txt file, so avoids to recompute recall if the same command is run again. To force to recompute recall, option `-f` can be used. If option `-o <output.csv>` is specified, a csv output file will be produced. This file can be used to plot Throughput-Recall curves.
+    template<typename T>
+    void parse_search_param(const nlohmann::json& conf,
+                            typename cuann::HnswLib<T>::SearchParam& param) {
+      param.ef = conf.at("ef");
+      if (conf.contains("numThreads")) {
+        param.num_threads = conf.at("numThreads");
+      }
+    }
+    ```
+
+2. Next, add corresponding `if` case to functions `create_algo()` and `create_search_param()` by calling parsing functions. The string literal in `if` condition statement must be the same as the value of `algo` in configuration file. For example,
+    ```c++
+      // JSON configuration file contains a line like:  "algo" : "hnswlib"
+      if (algo == "hnswlib") {
+         // ...
+      }
+    ```
diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md
new file mode 100644
index 0000000000..1391f261cb
--- /dev/null
+++ b/docs/source/ann_benchmarks_param_tuning.md
@@ -0,0 +1,13 @@
+# ANN Benchmarks Parameter Tuning Guide
+
+This guide outlines the various parameter settings that can be specified in [RAFT ANN Benchmark](raft_ann_benchmarks.md) json configuration files and explains the impact they have on corresponding algorithms to help inform their settings for benchmarking across desired levels of recall. 
+
+
+| Algorithm           | Parameter Options                            |
+|---------------------|----------------------------------------------|
+| `faiss_gpu_ivf_flat` | `{  }`                                       | `faiss_gpu_ivf_flat`, `faiss_gpu_ivf_pq` |
+| GGNN                | `ggnn`                                       |
+| HNSWlib             | `hnswlib`                                    |
+| RAFT                | `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq` |
+
+
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index 72e1dfa36a..f4af3c8787 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -219,7 +219,58 @@ The figure below is the resulting plot of running our benchmarks as of August 20
 
 ![Throughput vs recall plot comparing popular ANN algorithms with RAFT's at batch size 10](../../img/raft-vector-search-batch-10.png)
 
+## Creating and customizing dataset configurations
+
+A single configuration file will often define a set of algorithms, with associated index and search parameters, for a specific dataset. A configuration file uses json format with 4 major parts:
+1. Dataset information
+2. Algorithm information
+3. Index parameters
+4. Search parameters
+
+Below is a simple example configuration file for the 1M-scale `sift-128-euclidean` dataset:
+
+```json
+{
+  "dataset": {
+    "name": "sift-128-euclidean",
+    "base_file": "sift-128-euclidean/base.fbin",
+    "query_file": "sift-128-euclidean/query.fbin", 
+    "subset_size": 1000000,
+    "groundtruth_neighbors_file": "sift-128-euclidean/groundtruth.neighbors.ibin",
+    "distance": "euclidean"
+  },
+  "index": []
+}
+```
+
+The `index` section will contain a list of index objects, each of which will have the following form:
+```json
+{
+   "name": "algo_name.unique_index_name",
+   "algo": "algo_name",
+   "file": "sift-128-euclidean/algo_name/param1_val1-param2_val2",
+   "build_param": { "param1": "val1", "param2": "val2" },
+   "search_params": { "search_param1": "search_val1" }
+}
+```
+
+The table below contains the possible settings for the `algo` field. Each unique algorithm will have its own set of `build_param` and `search_params` settings. The [ANN Algorithm Parameter Tuning Guide](ann_benchmarks_param_tuning.md) contains detailed instructions on choosing build and search parameters for each supported algorithm.
+
+| Library   | Algorithms                                   |
+|-----------|----------------------------------------------|
+| FAISS | `faiss_gpu_ivf_flat`, `faiss_gpu_ivf_pq`     |
+| GGNN | `ggnn` |
+| HNSWlib | `hnswlib` |
+| RAFT    | `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq` |
+
+
+
+
+By default, the index will be placed in `bench/ann/data/<dataset_name>/index/<name>`. Using `sift-128-euclidean` for the dataset with the `algo` example above, the indexes would be placed in `bench/ann/data/sift-128-euclidean/index/algo_name/param1_val1-param2_val2`.
+
+
 ## Adding a new ANN algorithm
+
 ### Implementation and Configuration
 Implementation of a new algorithm should be a C++ class that inherits `class ANN` (defined in `cpp/bench/ann/src/ann.h`) and implements all the pure virtual functions.
 
@@ -244,10 +295,10 @@ public:
 };
 ```
 
-<a id='json-index-config'></a>The benchmark program uses JSON configuration file. To add the new algorithm to the benchmark, need be able to specify `build_param`, whose value is a JSON object, and `search_params`, whose value is an array of JSON objects, for this algorithm in configuration file. Still take the configuration for `HnswLib` as an example:
+<a id='json-index-config'></a>The benchmark program uses JSON format in a configuration file to specify indexes to build, along with the build and search parameters. To add the new algorithm to the benchmark, need be able to specify `build_param`, whose value is a JSON object, and `search_params`, whose value is an array of JSON objects, for this algorithm in configuration file. The `build_param` and `search_param` arguments will vary depending on the algorithm.  Take the configuration for `HnswLib` as an example:
 ```json
 {
-  "name" : "...",
+  "name" : "hnswlib.M12.ef500.th32",
   "algo" : "hnswlib",
   "build_param": {"M":12, "efConstruction":500, "numThreads":32},
   "file" : "/path/to/file",
@@ -259,7 +310,6 @@ public:
   "search_result_file" : "/path/to/file"
 },
 ```
-
 How to interpret these JSON objects is totally left to the implementation and should be specified in `cpp/bench/ann/src/factory.cuh`:
 1. First, add two functions for parsing JSON object to `struct BuildParam` and `struct SearchParam`, respectively:
     ```c++
@@ -283,7 +333,7 @@ How to interpret these JSON objects is totally left to the implementation and sh
     }
     ```
 
-2. Next, add corresponding `if` case to functions `create_algo()` and `create_search_param()` by calling parsing functions. The string literal in `if` condition statement must be the same as the value of `algo` in configuration file. For example,
+2. Next, add corresponding `if` case to functions `create_algo()` (in `bench/ann/) and `create_search_param()` by calling parsing functions. The string literal in `if` condition statement must be the same as the value of `algo` in configuration file. For example,
     ```c++
       // JSON configuration file contains a line like:  "algo" : "hnswlib"
       if (algo == "hnswlib") {
diff --git a/python/raft-ann-bench/LICENSE b/python/raft-ann-bench/LICENSE
new file mode 120000
index 0000000000..30cff7403d
--- /dev/null
+++ b/python/raft-ann-bench/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
\ No newline at end of file
diff --git a/python/raft-ann-bench/pyproject.toml b/python/raft-ann-bench/pyproject.toml
new file mode 100644
index 0000000000..4c8cc94288
--- /dev/null
+++ b/python/raft-ann-bench/pyproject.toml
@@ -0,0 +1,57 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+[build-system]
+build-backend = "setuptools.build_meta"
+requires = [
+    "setuptools",
+    "wheel",
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+
+[project]
+name = "raft-ann-bench"
+version = "23.10.00"
+description = "RAFT ANN benchmarks"
+authors = [
+    { name = "NVIDIA Corporation" },
+]
+license = { text = "Apache 2.0" }
+requires-python = ">=3.9"
+dependencies = [
+] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+classifiers = [
+    "Intended Audience :: Developers",
+    "Topic :: Database",
+    "Topic :: Scientific/Engineering",
+    "License :: OSI Approved :: Apache Software License",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+]
+dynamic = ["entry-points"]
+
+[project.urls]
+Homepage = "https://github.com/rapidsai/raft"
+
+[tool.setuptools]
+license-files = ["LICENSE"]
+
+[tool.isort]
+line_length = 79
+multi_line_output = 3
+include_trailing_comma = true
+force_grid_wrap = 0
+combine_as_imports = true
+order_by_type = true
+skip = [
+    "thirdparty",
+    ".eggs",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".tox",
+    ".venv",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+]
diff --git a/python/raft-ann-bench/raft-ann-bench/__init__.py b/python/raft-ann-bench/raft-ann-bench/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
new file mode 100644
index 0000000000..80e3dcad93
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
@@ -0,0 +1,92 @@
+#
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import os
+from pathlib import Path
+
+
+def parse_filepaths(fs):
+    for p in fs:
+        if p.endswith(".json") and os.path.exists(p):
+            yield p
+        else:
+            for f in Path(p).rglob("*.json"):
+                yield f.as_posix()
+
+
+def export_results(
+    output_filepath, recompute, groundtruth_filepath, result_filepaths
+):
+    print(f"Writing output file to: {output_filepath}")
+
+    parsed_filepaths = parse_filepaths(result_filepaths)
+
+    with open(output_filepath, "w") as out:
+        out.write("Algo,Recall,QPS\n")
+
+        for fp in parsed_filepaths:
+            with open(fp, "r") as f:
+                data = json.load(f)
+                for benchmark_case in data["benchmarks"]:
+                    algo = benchmark_case["name"]
+                    recall = benchmark_case["Recall"]
+                    qps = benchmark_case["items_per_second"]
+                    out.write(f"{algo},{recall},{qps}\n")
+
+
+def main():
+    call_path = os.getcwd()
+    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
+        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
+    else:
+        default_dataset_path = os.path.join(call_path, "datasets/")
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--output", help="Path to the CSV output file", required=True
+    )
+    parser.add_argument(
+        "--recompute", action="store_true", help="Recompute metrics"
+    )
+    parser.add_argument(
+        "--dataset",
+        help="Name of the dataset to export results for",
+        default="glove-100-inner",
+    )
+    parser.add_argument(
+        "--dataset-path",
+        help="path to dataset folder",
+        default=default_dataset_path,
+    )
+
+    args, result_filepaths = parser.parse_known_args()
+
+    # if nothing is provided
+    if len(result_filepaths) == 0:
+        raise ValueError("No filepaths to results were provided")
+
+    groundtruth_filepath = os.path.join(
+        args.dataset_path, args.dataset, "groundtruth.neighbors.ibin"
+    )
+    export_results(
+        args.output, args.recompute, groundtruth_filepath, result_filepaths
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bench/ann/get_dataset.py b/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
similarity index 62%
rename from bench/ann/get_dataset.py
rename to python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
index a175384dc3..605146a84e 100644
--- a/bench/ann/get_dataset.py
+++ b/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
@@ -32,16 +32,13 @@ def download_dataset(url, path):
 
 
 def convert_hdf5_to_fbin(path, normalize):
-    ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
-                                         "cpp/bench/ann/scripts")
-    ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
-                                          "hdf5_to_fbin.py")
+    ann_bench_scripts_path = "hdf5_to_fbin.py"
     if normalize and "angular" in path:
-        p = subprocess.Popen(["python", ann_bench_scripts_path, "-n",
-                              "%s" % path])
+        p = subprocess.Popen(
+            ["python", ann_bench_scripts_path, "-n", "%s" % path]
+        )
     else:
-        p = subprocess.Popen(["python", ann_bench_scripts_path,
-                              "%s" % path])
+        p = subprocess.Popen(["python", ann_bench_scripts_path, "%s" % path])
     p.wait()
 
 
@@ -53,10 +50,16 @@ def move(name, ann_bench_data_path):
     new_path = os.path.join(ann_bench_data_path, new_name)
     if not os.path.exists(new_path):
         os.mkdir(new_path)
-    for bin_name in ["base.fbin", "query.fbin", "groundtruth.neighbors.ibin",
-                     "groundtruth.distances.fbin"]:
-        os.rename(f"{ann_bench_data_path}/{name}.{bin_name}",
-                  f"{new_path}/{bin_name}")
+    for bin_name in [
+        "base.fbin",
+        "query.fbin",
+        "groundtruth.neighbors.ibin",
+        "groundtruth.distances.fbin",
+    ]:
+        os.rename(
+            f"{ann_bench_data_path}/{name}.{bin_name}",
+            f"{new_path}/{bin_name}",
+        )
 
 
 def download(name, normalize, ann_bench_data_path):
@@ -74,16 +77,27 @@ def download(name, normalize, ann_bench_data_path):
 
 
 def main():
+    call_path = os.getcwd()
+    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
+        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
+    else:
+        default_dataset_path = os.path.join(call_path, "datasets/")
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--dataset", help="dataset to download",
-                        default="glove-100-angular")
-    parser.add_argument("--dataset-path", help="path to download dataset",
-                        default=os.path.join(os.getenv("RAFT_HOME"), 
-                                             "bench", "ann", "data"))
-    parser.add_argument("--normalize",
-                        help="normalize cosine distance to inner product",
-                        action="store_true")
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset", help="dataset to download", default="glove-100-angular"
+    )
+    parser.add_argument(
+        "--dataset-path",
+        help="path to download dataset",
+        default=default_dataset_path,
+    )
+    parser.add_argument(
+        "--normalize",
+        help="normalize cosine distance to inner product",
+        action="store_true",
+    )
     args = parser.parse_args()
 
     download(args.dataset, args.normalize, args.dataset_path)
diff --git a/cpp/bench/ann/scripts/fbin_to_f16bin.py b/python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py
similarity index 57%
rename from cpp/bench/ann/scripts/fbin_to_f16bin.py
rename to python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py
index 4ea8988d87..ee7410e0cc 100755
--- a/cpp/bench/ann/scripts/fbin_to_f16bin.py
+++ b/python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py
@@ -1,20 +1,23 @@
-# =============================================================================
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
+# Copyright (c) 2023, NVIDIA CORPORATION.
 #
-# http://www.apache.org/licenses/LICENSE-2.0
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from __future__ import absolute_import, division, print_function
+
 import sys
+
 import numpy as np
 
 
diff --git a/cpp/bench/ann/scripts/hdf5_to_fbin.py b/python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py
similarity index 78%
rename from cpp/bench/ann/scripts/hdf5_to_fbin.py
rename to python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py
index cfeb184ea8..ba853c63f5 100755
--- a/cpp/bench/ann/scripts/hdf5_to_fbin.py
+++ b/python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py
@@ -1,19 +1,23 @@
-# =============================================================================
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
+# Copyright (c) 2023, NVIDIA CORPORATION.
 #
-# http://www.apache.org/licenses/LICENSE-2.0
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 
 import sys
-import numpy as np
+
 import h5py
+import numpy as np
 
 
 def normalize(x):
@@ -65,7 +69,8 @@ def write_bin(fname, data):
         query = normalize(query)
     elif hdf5.attrs["distance"] == "angular":
         print(
-            "warning: input has angular distance, specify -n to normalize base/query set!\n"
+            "warning: input has angular distance, ",
+            "specify -n to normalize base/query set!\n",
         )
 
     output_fname = fname_prefix + ".base.fbin"
diff --git a/bench/ann/plot.py b/python/raft-ann-bench/raft-ann-bench/plot/__main__.py
similarity index 70%
rename from bench/ann/plot.py
rename to python/raft-ann-bench/raft-ann-bench/plot/__main__.py
index 0020e398a9..5f81019d8c 100644
--- a/bench/ann/plot.py
+++ b/python/raft-ann-bench/raft-ann-bench/plot/__main__.py
@@ -13,22 +13,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# This script is inspired by 
+# This script is inspired by
 # 1: https://github.com/erikbern/ann-benchmarks/blob/main/plot.py
-# 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py
-# 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py
+# 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py  # noqa: E501
+# 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py  # noqa: E501
 # Licence: https://github.com/erikbern/ann-benchmarks/blob/main/LICENSE
 
-import matplotlib as mpl
-
-mpl.use("Agg")  # noqa
 import argparse
 import itertools
-import matplotlib.pyplot as plt
-import numpy as np
 import os
 
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
 
+mpl.use("Agg")
 
 metrics = {
     "k-nn": {
@@ -39,7 +38,7 @@
     "qps": {
         "description": "Queries per second (1/s)",
         "worst": float("-inf"),
-    }
+    },
 }
 
 
@@ -51,17 +50,36 @@ def euclidean(a, b):
         return sum((x - y) ** 2 for x, y in zip(a, b))
 
     while len(colors) < n:
-        new_color = max(itertools.product(vs, vs, vs), key=lambda a: min(euclidean(a, b) for b in colors))
+        new_color = max(
+            itertools.product(vs, vs, vs),
+            key=lambda a: min(euclidean(a, b) for b in colors),
+        )
         colors.append(new_color + (1.0,))
     return colors
 
 
 def create_linestyles(unique_algorithms):
-    colors = dict(zip(unique_algorithms, generate_n_colors(len(unique_algorithms))))
-    linestyles = dict((algo, ["--", "-.", "-", ":"][i % 4]) for i, algo in enumerate(unique_algorithms))
-    markerstyles = dict((algo, ["+", "<", "o", "*", "x"][i % 5]) for i, algo in enumerate(unique_algorithms))
-    faded = dict((algo, (r, g, b, 0.3)) for algo, (r, g, b, a) in colors.items())
-    return dict((algo, (colors[algo], faded[algo], linestyles[algo], markerstyles[algo])) for algo in unique_algorithms)
+    colors = dict(
+        zip(unique_algorithms, generate_n_colors(len(unique_algorithms)))
+    )
+    linestyles = dict(
+        (algo, ["--", "-.", "-", ":"][i % 4])
+        for i, algo in enumerate(unique_algorithms)
+    )
+    markerstyles = dict(
+        (algo, ["+", "<", "o", "*", "x"][i % 5])
+        for i, algo in enumerate(unique_algorithms)
+    )
+    faded = dict(
+        (algo, (r, g, b, 0.3)) for algo, (r, g, b, a) in colors.items()
+    )
+    return dict(
+        (
+            algo,
+            (colors[algo], faded[algo], linestyles[algo], markerstyles[algo]),
+        )
+        for algo in unique_algorithms
+    )
 
 
 def get_up_down(metric):
@@ -77,7 +95,10 @@ def get_left_right(metric):
 
 
 def get_plot_label(xm, ym):
-    template = "%(xlabel)s-%(ylabel)s tradeoff - %(updown)s and" " to the %(leftright)s is better"
+    template = (
+        "%(xlabel)s-%(ylabel)s tradeoff - %(updown)s and"
+        " to the %(leftright)s is better"
+    )
     return template % {
         "xlabel": xm["description"],
         "ylabel": ym["description"],
@@ -96,7 +117,9 @@ def create_pointset(data, xn, yn):
     # Generate Pareto frontier
     xs, ys, ls = [], [], []
     last_x = xm["worst"]
-    comparator = (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
+    comparator = (
+        (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
+    )
     for algo_name, xv, yv in data:
         if not xv or not yv:
             continue
@@ -133,12 +156,28 @@ def mean_y(algo):
         max_x = max([max_x] + [x for x in xs if x < 1])
         color, faded, linestyle, marker = linestyles[algo]
         (handle,) = plt.plot(
-            xs, ys, "-", label=algo, color=color, ms=7, mew=3, lw=3, marker=marker
+            xs,
+            ys,
+            "-",
+            label=algo,
+            color=color,
+            ms=7,
+            mew=3,
+            lw=3,
+            marker=marker,
         )
         handles.append(handle)
         if raw:
             (handle2,) = plt.plot(
-                axs, ays, "-", label=algo, color=faded, ms=5, mew=2, lw=2, marker=marker
+                axs,
+                ays,
+                "-",
+                label=algo,
+                color=faded,
+                ms=5,
+                mew=2,
+                lw=2,
+                marker=marker,
             )
         labels.append(algo)
 
@@ -172,7 +211,13 @@ def inv_fun(x):
     ax.set_title(get_plot_label(xm, ym))
     plt.gca().get_position()
     # plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])
-    ax.legend(handles, labels, loc="center left", bbox_to_anchor=(1, 0.5), prop={"size": 9})
+    ax.legend(
+        handles,
+        labels,
+        loc="center left",
+        bbox_to_anchor=(1, 0.5),
+        prop={"size": 9},
+    )
     plt.grid(visible=True, which="major", color="0.65", linestyle="-")
     plt.setp(ax.get_xminorticklabels(), visible=True)
 
@@ -194,28 +239,35 @@ def inv_fun(x):
 
 def load_all_results(result_filepath):
     results = dict()
-    with open(result_filepath, 'r') as f:
+    with open(result_filepath, "r") as f:
         for line in f.readlines()[1:]:
-            split_lines = line.split(',')
-            algo_name = split_lines[0].split('.')[0]
+            split_lines = line.split(",")
+            algo_name = split_lines[0].split(".")[0]
             if algo_name not in results:
                 results[algo_name] = []
-            results[algo_name].append([algo_name, float(split_lines[1]), 
-                                  float(split_lines[2])])
+            results[algo_name].append(
+                [algo_name, float(split_lines[1]), float(split_lines[2])]
+            )
     return results
 
 
 def main():
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--result-csv", help="Path to CSV Results", required=True)
-    parser.add_argument("--output", help="Path to the PNG output file",
-                        default=f"{os.getcwd()}/out.png")
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--result-csv", help="Path to CSV Results", required=True
+    )
+    parser.add_argument(
+        "--output",
+        help="Path to the PNG output file",
+        default=f"{os.getcwd()}/out.png",
+    )
     parser.add_argument(
         "--x-scale",
         help="Scale to use when drawing the X-axis. \
-              Typically linear, logit or a2", 
-        default="linear"
+              Typically linear, logit or a2",
+        default="linear",
     )
     parser.add_argument(
         "--y-scale",
@@ -224,7 +276,9 @@ def main():
         default="linear",
     )
     parser.add_argument(
-        "--raw", help="Show raw results (not just Pareto frontier) in faded colours", action="store_true"
+        "--raw",
+        help="Show raw results (not just Pareto frontier) in faded colours",
+        action="store_true",
     )
     args = parser.parse_args()
 
@@ -233,7 +287,9 @@ def main():
     results = load_all_results(args.result_csv)
     linestyles = create_linestyles(sorted(results.keys()))
 
-    create_plot(results, args.raw, args.x_scale, args.y_scale, args.output, linestyles)
+    create_plot(
+        results, args.raw, args.x_scale, args.y_scale, args.output, linestyles
+    )
 
 
 if __name__ == "__main__":
diff --git a/bench/ann/run.py b/python/raft-ann-bench/raft-ann-bench/run/__main__.py
similarity index 55%
rename from bench/ann/run.py
rename to python/raft-ann-bench/raft-ann-bench/run/__main__.py
index d8e33f1113..917e2e76da 100644
--- a/bench/ann/run.py
+++ b/python/raft-ann-bench/raft-ann-bench/run/__main__.py
@@ -17,9 +17,23 @@
 import json
 import os
 import subprocess
+
 import yaml
 
 
+def positive_int(input_str: str) -> int:
+    try:
+        i = int(input_str)
+        if i < 1:
+            raise ValueError
+    except ValueError:
+        raise argparse.ArgumentTypeError(
+            f"{input_str} is not a positive integer"
+        )
+
+    return i
+
+
 def validate_algorithm(algos_conf, algo):
     algos_conf_keys = set(algos_conf.keys())
     return algo in algos_conf_keys and not algos_conf[algo]["disabled"]
@@ -27,19 +41,33 @@ def validate_algorithm(algos_conf, algo):
 
 def find_executable(algos_conf, algo):
     executable = algos_conf[algo]["executable"]
-    conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann",
-                              executable)
-    build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
+    conda_path = os.path.join(
+        os.getenv("CONDA_PREFIX"), "bin", "ann", executable
+    )
+    build_path = os.path.join(
+        os.getenv("RAFT_HOME"), "cpp", "build", executable
+    )
     if os.path.exists(conda_path):
+        print("Using RAFT bench found in conda environment: ")
         return (executable, conda_path)
     elif os.path.exists(build_path):
+        print(f"Using RAFT bench from repository specified in {build_path}: ")
         return (executable, build_path)
     else:
         raise FileNotFoundError(executable)
 
 
-def run_build_and_search(conf_filename, conf_file, executables_to_run,
-                         force, conf_filedir, build, search):
+def run_build_and_search(
+    conf_filename,
+    conf_file,
+    executables_to_run,
+    force,
+    conf_filedir,
+    build,
+    search,
+    k,
+    batch_size,
+):
     for executable, ann_executable_path in executables_to_run.keys():
         # Need to write temporary configuration
         temp_conf_filename = f"temporary_executable_{conf_filename}"
@@ -48,41 +76,84 @@ def run_build_and_search(conf_filename, conf_file, executables_to_run,
             temp_conf = dict()
             temp_conf["dataset"] = conf_file["dataset"]
             temp_conf["search_basic_param"] = conf_file["search_basic_param"]
-            temp_conf["index"] = executables_to_run[(executable, 
-                                                     ann_executable_path)]["index"]
+            temp_conf["index"] = executables_to_run[
+                (executable, ann_executable_path)
+            ]["index"]
             json.dump(temp_conf, f)
 
         if build:
             if force:
-                p = subprocess.Popen([ann_executable_path, "-b", "-f",
-                                    temp_conf_filepath])
+                p = subprocess.Popen(
+                    [
+                        ann_executable_path,
+                        "--build",
+                        "--overwrite",
+                        temp_conf_filepath,
+                    ]
+                )
                 p.wait()
             else:
-                p = subprocess.Popen([ann_executable_path, "-b",
-                                    temp_conf_filepath])
+                p = subprocess.Popen(
+                    [ann_executable_path, "--build", temp_conf_filepath]
+                )
                 p.wait()
 
         if search:
-            if force:
-                p = subprocess.Popen([ann_executable_path, "-s", "-f",
-                                      temp_conf_filepath])
-                p.wait()
-            else:
-                p = subprocess.Popen([ann_executable_path, "-s",
-                                      temp_conf_filepath])
-                p.wait()
+            legacy_result_folder = "result/" + temp_conf["dataset"]["name"]
+            os.makedirs(legacy_result_folder, exist_ok=True)
+            p = subprocess.Popen(
+                [
+                    ann_executable_path,
+                    "--search",
+                    "--benchmark_counters_tabular",
+                    "--benchmark_out_format=json",
+                    "--override_kv=k:%s" % k,
+                    "--override_kv=n_queries:%s" % batch_size,
+                    f"--benchmark_out={legacy_result_folder}/{executable}.json",  # noqa: E501
+                    temp_conf_filepath,
+                ]
+            )
+            p.wait()
 
         os.remove(temp_conf_filepath)
 
 
 def main():
     scripts_path = os.path.dirname(os.path.realpath(__file__))
+    call_path = os.getcwd()
     # Read list of allowed algorithms
-    with open(f"{scripts_path}/algos.yaml", "r") as f:
+    try:
+        import pylibraft  # noqa: F401
+
+        algo_file = "algos.yaml"
+    except ImportError:
+        algo_file = "algos_cpu.yaml"
+    with open(f"{scripts_path}/{algo_file}", "r") as f:
         algos_conf = yaml.safe_load(f)
 
+    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
+        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
+    else:
+        default_dataset_path = os.path.join(call_path, "datasets/")
+
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+
+    parser.add_argument(
+        "-k",
+        "--count",
+        default=10,
+        type=positive_int,
+        help="the number of nearest neighbors to search for",
+    )
+    parser.add_argument(
+        "-bs",
+        "--batch-size",
+        default=10000,
+        type=positive_int,
+        help="number of query vectors to use in each query trial",
+    )
     parser.add_argument(
         "--configuration",
         help="path to configuration file for a dataset",
@@ -90,34 +161,34 @@ def main():
     parser.add_argument(
         "--dataset",
         help="dataset whose configuration file will be used",
-        default="glove-100-inner"
+        default="glove-100-inner",
     )
     parser.add_argument(
         "--dataset-path",
         help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"), 
-                             "bench", "ann", "data")
+        default=default_dataset_path,
     )
+    parser.add_argument("--build", action="store_true")
+    parser.add_argument("--search", action="store_true")
     parser.add_argument(
-        "--build",
-        action="store_true"
+        "--algorithms",
+        help="run only comma separated list of named \
+                              algorithms",
+        default=None,
     )
     parser.add_argument(
-        "--search",
-        action="store_true"
-    )
-    parser.add_argument("--algorithms",
-                        help="run only comma separated list of named \
-                              algorithms",
-                        default=None)
-    parser.add_argument("--indices",
-                        help="run only comma separated list of named indices. \
+        "--indices",
+        help="run only comma separated list of named indices. \
                               parameter `algorithms` is ignored",
-                        default=None)
-    parser.add_argument("-f", "--force",
-                        help="re-run algorithms even if their results \
+        default=None,
+    )
+    parser.add_argument(
+        "-f",
+        "--force",
+        help="re-run algorithms even if their results \
                               already exist",
-                        action="store_true")
+        action="store_true",
+    )
 
     args = parser.parse_args()
 
@@ -130,15 +201,22 @@ def main():
         build = args.build
         search = args.search
 
+    k = args.count
+    batch_size = args.batch_size
+
     # Read configuration file associated to dataset
     if args.configuration:
         conf_filepath = args.configuration
     else:
-        conf_filepath = os.path.join(scripts_path, "conf", f"{args.dataset}.json")
+        conf_filepath = os.path.join(
+            scripts_path, "conf", f"{args.dataset}.json"
+        )
     conf_filename = conf_filepath.split("/")[-1]
     conf_filedir = "/".join(conf_filepath.split("/")[:-1])
     dataset_name = conf_filename.replace(".json", "")
-    dataset_path = os.path.join(args.dataset_path, dataset_name)
+    dataset_path = os.path.realpath(
+        os.path.join(args.dataset_path, dataset_name)
+    )
     if not os.path.exists(conf_filepath):
         raise FileNotFoundError(conf_filename)
 
@@ -146,14 +224,13 @@ def main():
         conf_file = json.load(f)
 
     # Replace base, query to dataset-path
-    replacement_base_filepath = \
-        os.path.normpath(conf_file["dataset"]["base_file"]).split(os.path.sep)[-1]
-    conf_file["dataset"]["base_file"] = \
-        os.path.join(dataset_path, replacement_base_filepath)
-    replacement_query_filepath = \
-        os.path.normpath(conf_file["dataset"]["query_file"]).split(os.path.sep)[-1]
-    conf_file["dataset"]["query_file"] = \
-        os.path.join(dataset_path, replacement_query_filepath)
+    conf_file["dataset"]["base_file"] = os.path.join(dataset_path, "base.fbin")
+    conf_file["dataset"]["query_file"] = os.path.join(
+        dataset_path, "query.fbin"
+    )
+    conf_file["dataset"]["groundtruth_neighbors_file"] = os.path.join(
+        dataset_path, "groundtruth.neighbors.ibin"
+    )
     # Ensure base and query files exist for dataset
     if not os.path.exists(conf_file["dataset"]["base_file"]):
         raise FileNotFoundError(conf_file["dataset"]["base_file"])
@@ -168,8 +245,9 @@ def main():
         # and enabled
         for index in conf_file["index"]:
             curr_algo = index["algo"]
-            if index["name"] in indices and \
-                    validate_algorithm(algos_conf, curr_algo):
+            if index["name"] in indices and validate_algorithm(
+                algos_conf, curr_algo
+            ):
                 executable_path = find_executable(algos_conf, curr_algo)
                 if executable_path not in executables_to_run:
                     executables_to_run[executable_path] = {"index": []}
@@ -182,8 +260,9 @@ def main():
         # and are enabled in algos.yaml
         for index in conf_file["index"]:
             curr_algo = index["algo"]
-            if curr_algo in algorithms and \
-                    validate_algorithm(algos_conf, curr_algo):
+            if curr_algo in algorithms and validate_algorithm(
+                algos_conf, curr_algo
+            ):
                 executable_path = find_executable(algos_conf, curr_algo)
                 if executable_path not in executables_to_run:
                     executables_to_run[executable_path] = {"index": []}
@@ -201,14 +280,26 @@ def main():
 
     # Replace build, search to dataset path
     for executable_path in executables_to_run:
-        for pos, index in enumerate(executables_to_run[executable_path]["index"]):
+        for pos, index in enumerate(
+            executables_to_run[executable_path]["index"]
+        ):
             index["file"] = os.path.join(dataset_path, "index", index["name"])
-            index["search_result_file"] = \
-                os.path.join(dataset_path, "result", index["name"])
+            index["search_result_file"] = os.path.join(
+                dataset_path, "result", index["name"]
+            )
             executables_to_run[executable_path]["index"][pos] = index
 
-    run_build_and_search(conf_filename, conf_file, executables_to_run,
-                         args.force, conf_filedir, build, search)
+    run_build_and_search(
+        conf_filename,
+        conf_file,
+        executables_to_run,
+        args.force,
+        conf_filedir,
+        build,
+        search,
+        k,
+        batch_size,
+    )
 
 
 if __name__ == "__main__":
diff --git a/python/raft-ann-bench/raft-ann-bench/run/algos-cpu.yaml b/python/raft-ann-bench/raft-ann-bench/run/algos-cpu.yaml
new file mode 100644
index 0000000000..cb63d0920c
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/algos-cpu.yaml
@@ -0,0 +1,30 @@
+faiss_gpu_ivf_flat:
+  executable: FAISS_IVF_FLAT_ANN_BENCH
+  disabled: true
+faiss_gpu_flat:
+  executable: FAISS_IVF_FLAT_ANN_BENCH
+  disabled: true
+faiss_gpu_ivf_pq:
+  executable: FAISS_IVF_PQ_ANN_BENCH
+  disabled: true
+faiss_gpu_ivf_sq:
+  executable: FAISS_IVF_PQ_ANN_BENCH
+  disabled: true
+faiss_gpu_bfknn:
+  executable: FAISS_BFKNN_ANN_BENCH
+  disabled: true
+raft_ivf_flat:
+  executable: RAFT_IVF_FLAT_ANN_BENCH
+  disabled: true
+raft_ivf_pq:
+  executable: RAFT_IVF_PQ_ANN_BENCH
+  disabled: true
+raft_cagra:
+  executable: RAFT_CAGRA_ANN_BENCH
+  disabled: true
+ggnn:
+  executable: GGNN_ANN_BENCH
+  disabled: true
+hnswlib:
+  executable: HNSWLIB_ANN_BENCH
+  disabled: false
diff --git a/bench/ann/algos.yaml b/python/raft-ann-bench/raft-ann-bench/run/algos.yaml
similarity index 100%
rename from bench/ann/algos.yaml
rename to python/raft-ann-bench/raft-ann-bench/run/algos.yaml
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json b/python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json
new file mode 100644
index 0000000000..c691c68299
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json
@@ -0,0 +1,196 @@
+{
+  "dataset": {
+    "name": "bigann-100M",
+    "base_file": "bigann-1B/base.1B.u8bin",
+    "subset_size": 100000000,
+    "query_file": "bigann-1B/query.public.10K.u8bin",
+    "groundtruth_neighbors_file": "bigann-100M/groundtruth.neighbors.ibin",
+    "distance": "euclidean"
+  },
+
+  "search_basic_param": {
+    "batch_size": 10000,
+    "k": 10
+  },
+
+  "index": [
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster5K",
+      "algo": "raft_ivf_pq",
+      "build_param": {"niter": 25, "nlist": 5000, "pq_dim": 64, "ratio": 10},
+      "file": "bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
+      "dataset_memtype": "host",
+      "search_params": [
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-cluster10K",
+      "algo": "raft_ivf_pq",
+      "build_param": {"niter": 25, "nlist": 10000, "pq_dim": 64, "ratio": 10},
+      "file": "bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
+      "search_params": [
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
+    },
+    {
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file": "bigann-100M/hnswlib/M12",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file": "bigann-100M/hnswlib/M16",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file": "bigann-100M/hnswlib/M24",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file": "bigann-100M/hnswlib/M36",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "raft_ivf_flat.nlist100K",
+      "algo": "raft_ivf_flat",
+      "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
+      "dataset_memtype":"host",
+      "file": "bigann-100M/raft_ivf_flat/nlist100K",
+      "search_params": [
+        {"max_batch":10000, "max_k":10, "nprobe":20},
+        {"max_batch":10000, "max_k":10, "nprobe":30},
+        {"max_batch":10000, "max_k":10, "nprobe":40},
+        {"max_batch":10000, "max_k":10, "nprobe":50},
+        {"max_batch":10000, "max_k":10, "nprobe":100},
+        {"max_batch":10000, "max_k":10, "nprobe":200},
+        {"max_batch":10000, "max_k":10, "nprobe":500},
+        {"max_batch":10000, "max_k":10, "nprobe":1000}
+      ]
+    },
+    {
+      "name": "raft_cagra.dim32",
+      "algo": "raft_cagra",
+      "dataset_memtype": "host",
+      "build_param": {"index_dim": 32},
+      "file": "bigann-100M/raft_cagra/dim32",
+      "search_params": [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ]
+    },
+    {
+      "name": "raft_cagra.dim64",
+      "algo": "raft_cagra",
+      "dataset_memtype":"host",
+      "build_param": {"index_dim": 64},
+      "file": "bigann-100M/raft_cagra/dim64",
+      "search_params": [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ]
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json
new file mode 100644
index 0000000000..6591957961
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json
@@ -0,0 +1,286 @@
+{
+  "dataset": {
+    "name": "deep-100M",
+    "base_file": "data/deep-1B/base.1B.fbin",
+    "subset_size": 100000000,
+    "query_file": "data/deep-1B/query.public.10K.fbin",
+    "groundtruth_neighbors_file": "deep-100M/groundtruth.neighbors.ibin",
+    "distance": "euclidean"
+  },
+
+  "search_basic_param": {
+    "batch_size": 10000,
+    "k": 10
+  },
+
+  "index": [
+    {
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file": "deep-100M/hnswlib/M12",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file": "deep-100M/hnswlib/M16",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file": "deep-100M/hnswlib/M24",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file": "deep-100M/hnswlib/M36",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "faiss_ivf_flat.nlist50K",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {"nlist":50000},
+      "file": "deep-100M/faiss_ivf_flat/nlist50K",
+      "search_params": [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_flat.nlist100K",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {"nlist":100000},
+      "file": "deep-100M/faiss_ivf_flat/nlist100K",
+      "search_params": [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_flat.nlist200K",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {"nlist":200000},
+      "file": "deep-100M/faiss_ivf_flat/nlist200K",
+      "search_params": [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_pq.M48-nlist16K",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":16384, "M":48},
+      "file": "deep-100M/faiss_ivf_pq/M48-nlist16K",
+      "search_params": [
+        {"nprobe":10},
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500}
+      ]
+    },
+    {
+      "name": "faiss_ivf_pq.M48-nlist50K",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":50000, "M":48},
+      "file": "deep-100M/faiss_ivf_pq/M48-nlist50K",
+      "search_params": [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_pq.M48-nlist100K",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":100000, "M":48},
+      "file": "deep-100M/faiss_ivf_pq/M48-nlist100K",
+      "search_params": [
+        {"nprobe":20},
+        {"nprobe":30},
+        {"nprobe":40},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000}
+      ]
+    },
+    {
+      "name": "raft_ivf_flat.nlist100K",
+      "algo": "raft_ivf_flat",
+      "dataset_memtype":"host",
+      "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
+      "file": "deep-100M/raft_ivf_flat/nlist100K",
+      "search_params": [
+        {"max_batch":10000, "max_k":10, "nprobe":20},
+        {"max_batch":10000, "max_k":10, "nprobe":30},
+        {"max_batch":10000, "max_k":10, "nprobe":40},
+        {"max_batch":10000, "max_k":10, "nprobe":50},
+        {"max_batch":10000, "max_k":10, "nprobe":100},
+        {"max_batch":10000, "max_k":10, "nprobe":200},
+        {"max_batch":10000, "max_k":10, "nprobe":500},
+        {"max_batch":10000, "max_k":10, "nprobe":1000}
+      ]
+    },
+    {
+      "name": "raft_cagra.dim32",
+      "algo": "raft_cagra",
+      "dataset_memtype":"host",
+      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "file": "deep-100M/raft_cagra/dim32",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "single_cta"},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "single_cta"},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "single_cta"},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "single_cta"},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "single_cta"},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "single_cta"},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "single_cta"},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "single_cta"},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "single_cta"},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "single_cta"},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "single_cta"},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "single_cta"}
+      ]
+    },
+    {
+      "name": "raft_cagra.dim32.multi_cta",
+      "algo": "raft_cagra",
+      "dataset_memtype":"host",
+      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "file": "deep-100M/raft_cagra/dim32",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta"},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta"},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_cta"},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_cta"},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_cta"},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_cta"},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_cta"},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_cta"},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_cta"},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_cta"},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_cta"},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_cta"}
+      ]
+    {
+      "name": "raft_cagra.dim32.multi_kernel",
+      "algo": "raft_cagra",
+      "dataset_memtype":"host",
+      "build_param": {"index_dim": 32, "intermediate_graph_degree": 48},
+      "file": "deep-100M/raft_cagra/dim32",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_kernel"},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_kernel"},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16, "algo": "multi_kernel"},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64, "algo": "multi_kernel"},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48, "algo": "multi_kernel"},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16, "algo": "multi_kernel"},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64, "algo": "multi_kernel"},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24, "algo": "multi_kernel"},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96, "algo": "multi_kernel"},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32, "algo": "multi_kernel"},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48, "algo": "multi_kernel"},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64, "algo": "multi_kernel"}
+      ]
+    },
+    {
+      "name": "raft_cagra.dim64",
+      "algo": "raft_cagra",
+      "dataset_memtype":"host",
+      "build_param": {"index_dim": 64},
+      "file": "deep-100M/raft_cagra/dim64",
+      "search_params": [
+        {"itopk": 32, "search_width": 1, "max_iterations": 0},
+        {"itopk": 32, "search_width": 1, "max_iterations": 32},
+        {"itopk": 64, "search_width": 4, "max_iterations": 16},
+        {"itopk": 64, "search_width": 1, "max_iterations": 64},
+        {"itopk": 96, "search_width": 2, "max_iterations": 48},
+        {"itopk": 128, "search_width": 8, "max_iterations": 16},
+        {"itopk": 128, "search_width": 2, "max_iterations": 64},
+        {"itopk": 192, "search_width": 8, "max_iterations": 24},
+        {"itopk": 192, "search_width": 2, "max_iterations": 96},
+        {"itopk": 256, "search_width": 8, "max_iterations": 32},
+        {"itopk": 384, "search_width": 8, "max_iterations": 48},
+        {"itopk": 512, "search_width": 8, "max_iterations": 64}
+      ]
+    }
+  ]
+}
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json
new file mode 100644
index 0000000000..632d2f7308
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json
@@ -0,0 +1,34 @@
+{
+  "dataset": {
+    "name": "deep-1B",
+    "base_file": "deep-1B/base.1B.fbin",
+    "query_file": "deep-1B/query.public.10K.fbin",
+    "groundtruth_neighbors_file": "deep-1B/groundtruth.neighbors.ibin",
+    "distance": "inner_product"
+  },
+
+  "search_basic_param": {
+    "batch_size": 10000,
+    "k": 10
+  },
+
+  "index": [
+    {
+      "name": "faiss_ivf_pq.M48-nlist50K",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {"nlist":50000, "M":48},
+      "file": "deep-1B/faiss_ivf_pq/M48-nlist50K",
+      "search_params": [
+        {"nprobe":1},
+        {"nprobe":5},
+        {"nprobe":10},
+        {"nprobe":50},
+        {"nprobe":100},
+        {"nprobe":200},
+        {"nprobe":500},
+        {"nprobe":1000},
+        {"nprobe":2000}
+      ]
+    }
+  ]
+}
diff --git a/bench/ann/conf/deep-image-96-angular.json b/python/raft-ann-bench/raft-ann-bench/run/conf/deep-image-96-angular.json
similarity index 100%
rename from bench/ann/conf/deep-image-96-angular.json
rename to python/raft-ann-bench/raft-ann-bench/run/conf/deep-image-96-angular.json
diff --git a/bench/ann/conf/fashion-mnist-784-euclidean.json b/python/raft-ann-bench/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json
similarity index 100%
rename from bench/ann/conf/fashion-mnist-784-euclidean.json
rename to python/raft-ann-bench/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json
diff --git a/bench/ann/conf/gist-960-euclidean.json b/python/raft-ann-bench/raft-ann-bench/run/conf/gist-960-euclidean.json
similarity index 100%
rename from bench/ann/conf/gist-960-euclidean.json
rename to python/raft-ann-bench/raft-ann-bench/run/conf/gist-960-euclidean.json
diff --git a/bench/ann/conf/glove-100-angular.json b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-angular.json
similarity index 100%
rename from bench/ann/conf/glove-100-angular.json
rename to python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-angular.json
diff --git a/bench/ann/conf/glove-100-inner.json b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json
similarity index 56%
rename from bench/ann/conf/glove-100-inner.json
rename to python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json
index 5d0bbf970c..7c95ceb439 100644
--- a/bench/ann/conf/glove-100-inner.json
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json
@@ -1,24 +1,24 @@
 {
-  "dataset" : {
-    "name" : "glove-100-inner",
-    "base_file" : "data/glove-100-inner/base.fbin",
-    "query_file" : "data/glove-100-inner/query.fbin",
-    "distance" : "inner_product"
+  "dataset": {
+    "name": "glove-100-inner",
+    "base_file": "glove-100-inner/base.fbin",
+    "query_file": "glove-100-inner/query.fbin",
+    "groundtruth_neighbors_file": "glove-100-inner/groundtruth.neighbors.ibin",
+    "distance": "inner_product"
   },
 
-  "search_basic_param" : {
-    "batch_size" : 1,
-    "k" : 10,
-    "run_count" : 3
+  "search_basic_param": {
+    "batch_size": 1,
+    "k": 10
   },
 
-  "index" : [
+  "index": [
     {
-      "name" : "hnswlib.M4",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M4",
+      "algo": "hnswlib",
       "build_param": {"M":4, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M4",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M4",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -28,16 +28,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M4"
+      ]
     },
-
     {
-      "name" : "hnswlib.M8",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M8",
+      "algo": "hnswlib",
       "build_param": {"M":8, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M8",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M8",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -47,16 +45,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M8"
+      ]
     },
-
     {
-      "name" : "hnswlib.M12",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
       "build_param": {"M":12, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M12",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M12",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -66,16 +62,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M12"
+      ]
     },
-
     {
-      "name" : "hnswlib.M16",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
       "build_param": {"M":16, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M16",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M16",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -85,16 +79,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M16"
+      ]
     },
-
     {
-      "name" : "hnswlib.M24",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
       "build_param": {"M":24, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M24",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M24",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -104,16 +96,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M24"
+      ]
     },
-
     {
-      "name" : "hnswlib.M36",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
       "build_param": {"M":36, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M36",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M36",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -123,16 +113,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M36"
+      ]
     },
-
     {
-      "name" : "hnswlib.M48",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M48",
+      "algo": "hnswlib",
       "build_param": {"M":48, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M48",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M48",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -142,16 +130,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M48"
+      ]
     },
-
     {
-      "name" : "hnswlib.M64",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M64",
+      "algo": "hnswlib",
       "build_param": {"M":64, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M64",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M64",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -161,16 +147,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M64"
+      ]
     },
-
     {
-      "name" : "hnswlib.M96",
-      "algo" : "hnswlib",
+      "name": "hnswlib.M96",
+      "algo": "hnswlib",
       "build_param": {"M":96, "efConstruction":500, "numThreads":4},
-      "file" : "index/glove-100-inner/hnswlib/M96",
-      "search_params" : [
+      "file": "glove-100-inner/hnswlib/M96",
+      "search_params": [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
         {"ef":40, "numThreads":1},
@@ -180,16 +164,14 @@
         {"ef":400, "numThreads":1},
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
-      ],
-      "search_result_file" : "result/glove-100-inner/hnswlib/M96"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist1024",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":1024},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -198,16 +180,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist2048",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":2048},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -216,16 +196,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist4096",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":4096},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -234,16 +212,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist8192",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":8192},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -252,16 +228,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_flat.nlist16384",
-      "algo" : "faiss_gpu_ivf_flat",
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
       "build_param": {"nlist":16384},
-      "file" : "index/glove-100-inner/faiss_ivf_flat/nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_flat/nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -271,18 +245,17 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_flat/nlist16384"
+      ]
     },
 
 
 
     {
-      "name" : "faiss_ivf_pq.M2-nlist1024",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":1024, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -291,16 +264,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist2048",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist2048",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":2048, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -309,16 +280,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist4096",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist4096",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":4096, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -327,16 +296,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist8192",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist8192",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":8192, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -345,16 +312,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M2-nlist16384",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M2-nlist16384",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":16384, "M":2},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M2-nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M2-nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -364,16 +329,14 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M2-nlist16384"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist1024",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":1024, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -382,16 +345,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist2048",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist2048",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":2048, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -400,16 +361,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist4096",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist4096",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":4096, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -418,16 +377,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist8192",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist8192",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":8192, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -436,16 +393,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M4-nlist16384",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M4-nlist16384",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":16384, "M":4},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M4-nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M4-nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -455,16 +410,14 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M4-nlist16384"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist1024",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":1024, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist1024",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist1024",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -473,16 +426,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist1024"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist2048",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist2048",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":2048, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist2048",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist2048",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -491,16 +442,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist2048"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist4096",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist4096",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":4096, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist4096",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist4096",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -509,16 +458,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist4096"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist8192",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist8192",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":8192, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist8192",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist8192",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -527,16 +474,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist8192"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_pq.M20-nlist16384",
-      "algo" : "faiss_gpu_ivf_pq",
+      "name": "faiss_ivf_pq.M20-nlist16384",
+      "algo": "faiss_gpu_ivf_pq",
       "build_param": {"nlist":16384, "M":20},
-      "file" : "index/glove-100-inner/faiss_ivf_pq/M20-nlist16384",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_pq/M20-nlist16384",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -546,17 +491,16 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_pq/M20-nlist16384"
+      ]
     },
 
 
     {
-      "name" : "faiss_ivf_sq.nlist1024-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":1024, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist1024-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -565,16 +509,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist1024-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist2048-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":2048, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist2048-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -583,16 +525,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist2048-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist4096-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":4096, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist4096-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -601,16 +541,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist4096-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist8192-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":8192, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist8192-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -619,16 +557,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist8192-fp16"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist16384-fp16",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":16384, "quantizer_type":"fp16"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist16384-fp16",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -638,17 +574,14 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist16384-fp16"
+      ]
     },
-
-
     {
-      "name" : "faiss_ivf_sq.nlist1024-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":1024, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist1024-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -657,16 +590,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist1024-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist2048-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":2048, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist2048-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -675,16 +606,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist2048-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist4096-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":4096, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist4096-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -693,16 +622,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist4096-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist8192-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":8192, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist8192-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -711,16 +638,14 @@
         {"nprobe":200},
         {"nprobe":500},
         {"nprobe":1000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist8192-int8"
+      ]
     },
-
     {
-      "name" : "faiss_ivf_sq.nlist16384-int8",
-      "algo" : "faiss_gpu_ivf_sq",
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
       "build_param": {"nlist":16384, "quantizer_type":"int8"},
-      "file" : "index/glove-100-inner/faiss_ivf_sq/nlist16384-int8",
-      "search_params" : [
+      "file": "glove-100-inner/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
         {"nprobe":1},
         {"nprobe":5},
         {"nprobe":10},
@@ -730,22 +655,18 @@
         {"nprobe":500},
         {"nprobe":1000},
         {"nprobe":2000}
-      ],
-      "search_result_file" : "result/glove-100-inner/faiss_ivf_sq/nlist16384-int8"
+      ]
     },
-
     {
-      "name" : "faiss_flat",
-      "algo" : "faiss_gpu_flat",
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
       "build_param": {},
-      "file" : "index/glove-100-inner/faiss_flat/flat",
-      "search_params" : [{}],
-      "search_result_file" : "result/glove-100-inner/faiss_flat/flat"
+      "file": "glove-100-inner/faiss_flat/flat",
+      "search_params": [{}]
     },
-
     {
-      "name" : "ggnn.kbuild96-segment64-refine2-k10",
-      "algo" : "ggnn",
+      "name": "ggnn.kbuild96-segment64-refine2-k10",
+      "algo": "ggnn",
       "build_param": {
         "k_build": 96,
         "segment_size": 64,
@@ -753,8 +674,8 @@
         "dataset_size": 1183514,
         "k": 10
       },
-      "file" : "index/glove-100-inner/ggnn/kbuild96-segment64-refine2-k10",
-      "search_params" : [
+      "file": "glove-100-inner/ggnn/kbuild96-segment64-refine2-k10",
+      "search_params": [
         {"tau":0.001, "block_dim":64, "sorted_size":32},
         {"tau":0.005, "block_dim":64, "sorted_size":32},
         {"tau":0.01,  "block_dim":64, "sorted_size":32},
@@ -786,8 +707,7 @@
         {"tau":0.3,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
         {"tau":0.4,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32},
         {"tau":0.5,  "block_dim":128, "max_iterations":2000, "cache_size":1024, "sorted_size":32}
-
-      ],
-      "search_result_file" : "result/glove-100-inner/ggnn/kbuild96-segment64-refine2-k10"
-    }]
+      ]
+    }
+  ]
 }
diff --git a/bench/ann/conf/glove-50-angular.json b/python/raft-ann-bench/raft-ann-bench/run/conf/glove-50-angular.json
similarity index 100%
rename from bench/ann/conf/glove-50-angular.json
rename to python/raft-ann-bench/raft-ann-bench/run/conf/glove-50-angular.json
diff --git a/bench/ann/conf/lastfm-65-angular.json b/python/raft-ann-bench/raft-ann-bench/run/conf/lastfm-65-angular.json
similarity index 100%
rename from bench/ann/conf/lastfm-65-angular.json
rename to python/raft-ann-bench/raft-ann-bench/run/conf/lastfm-65-angular.json
diff --git a/bench/ann/conf/mnist-784-euclidean.json b/python/raft-ann-bench/raft-ann-bench/run/conf/mnist-784-euclidean.json
similarity index 100%
rename from bench/ann/conf/mnist-784-euclidean.json
rename to python/raft-ann-bench/raft-ann-bench/run/conf/mnist-784-euclidean.json
diff --git a/bench/ann/conf/nytimes-256-angular.json b/python/raft-ann-bench/raft-ann-bench/run/conf/nytimes-256-angular.json
similarity index 100%
rename from bench/ann/conf/nytimes-256-angular.json
rename to python/raft-ann-bench/raft-ann-bench/run/conf/nytimes-256-angular.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json b/python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json
new file mode 100644
index 0000000000..116ea8d557
--- /dev/null
+++ b/python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json
@@ -0,0 +1,498 @@
+{
+  "dataset": {
+    "name": "sift-128-euclidean",
+    "base_file": "sift-128-euclidean/base.fbin",
+    "query_file": "sift-128-euclidean/query.fbin",
+    "groundtruth_neighbors_file": "sift-128-euclidean/groundtruth.neighbors.ibin",
+    "distance": "euclidean"
+  },
+
+  "search_basic_param": {
+    "batch_size": 5000,
+    "k": 10
+  },
+
+  "index": [
+    {
+      "name": "hnswlib.M12",
+      "algo": "hnswlib",
+      "build_param": {"M":12, "efConstruction":500, "numThreads":32},
+      "file": "sift-128-euclidean/hnswlib/M12",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "hnswlib.M16",
+      "algo": "hnswlib",
+      "build_param": {"M":16, "efConstruction":500, "numThreads":32},
+      "file": "sift-128-euclidean/hnswlib/M16",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "hnswlib.M24",
+      "algo": "hnswlib",
+      "build_param": {"M":24, "efConstruction":500, "numThreads":32},
+      "file": "sift-128-euclidean/hnswlib/M24",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "hnswlib.M36",
+      "algo": "hnswlib",
+      "build_param": {"M":36, "efConstruction":500, "numThreads":32},
+      "file": "sift-128-euclidean/hnswlib/M36",
+      "search_params": [
+        {"ef":10, "numThreads":1},
+        {"ef":20, "numThreads":1},
+        {"ef":40, "numThreads":1},
+        {"ef":60, "numThreads":1},
+        {"ef":80, "numThreads":1},
+        {"ef":120, "numThreads":1},
+        {"ef":200, "numThreads":1},
+        {"ef":400, "numThreads":1},
+        {"ef":600, "numThreads":1},
+        {"ef":800, "numThreads":1}
+      ]
+    },
+    {
+      "name": "raft_bfknn",
+      "algo": "raft_bfknn",
+      "build_param": {},
+      "file": "sift-128-euclidean/raft_bfknn/bfknn",
+      "search_params": [{"probe": 1}]
+    },
+    {
+      "name": "faiss_ivf_flat.nlist1024",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {"nlist": 1024},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist1024",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_flat.nlist2048",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {"nlist": 2048},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist2048",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_flat.nlist4096",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {"nlist": 4096},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist4096",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_flat.nlist8192",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {"nlist": 8192},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist8192",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_flat.nlist16384",
+      "algo": "faiss_gpu_ivf_flat",
+      "build_param": {"nlist": 16384},
+      "file": "sift-128-euclidean/faiss_ivf_flat/nlist16384",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {"nlist": 1024, "M": 64, "useFloat16": true, "usePrecomputed": true},
+      "file": "sift-128-euclidean/faiss_ivf_pq/M64-nlist1024",
+      "search_params": [
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
+      "algo": "faiss_gpu_ivf_pq",
+      "build_param": {
+        "nlist": 1024,
+        "M": 64,
+        "useFloat16": true,
+        "usePrecomputed": false
+      },
+      "file": "sift-128-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "search_params": [
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {"nlist": 1024, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {"nlist": 2048, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {"nlist": 4096, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {"nlist": 8192, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-fp16",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {"nlist": 16384, "quantizer_type": "fp16"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_sq.nlist1024-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {"nlist": 1024, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist1024-int8",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_sq.nlist2048-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {"nlist": 2048,"quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist2048-int8",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_sq.nlist4096-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {"nlist": 4096, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist4096-int8",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_sq.nlist8192-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {"nlist": 8192, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist8192-int8",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "faiss_ivf_sq.nlist16384-int8",
+      "algo": "faiss_gpu_ivf_sq",
+      "build_param": {"nlist": 16384, "quantizer_type": "int8"},
+      "file": "sift-128-euclidean/faiss_ivf_sq/nlist16384-int8",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
+    },
+    {
+      "name": "faiss_flat",
+      "algo": "faiss_gpu_flat",
+      "build_param": {},
+      "file": "sift-128-euclidean/faiss_flat/flat",
+      "search_params": [{}]
+    },
+    {
+      "name": "raft_ivf_pq.dimpq64-bitpq8-cluster1K",
+      "algo": "raft_ivf_pq",
+      "build_param": {"niter": 25, "nlist": 1000, "pq_dim": 64, "pq_bits": 8, "ratio": 1},
+      "file": "sift-128-euclidean/raft_ivf_pq/dimpq64-bitpq8-cluster1K",
+      "search_params": [
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
+    },
+    {
+      "name": "raft_ivf_pq.dimpq128-bitpq6-cluster1K",
+      "algo": "raft_ivf_pq",
+      "build_param": {"niter": 25, "nlist": 1000, "pq_dim": 128, "pq_bits": 6, "ratio": 1},
+      "file": "sift-128-euclidean/raft_ivf_pq/dimpq128-bitpq6-cluster1K",
+      "search_params": [
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "float" },
+        { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 40, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 100, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 200, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 1000, "internalDistanceDtype": "float", "smemLutDtype": "fp8" },
+        { "nprobe": 20, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 30, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 40, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 100, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 200, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half" },
+        { "nprobe": 1000, "internalDistanceDtype": "half", "smemLutDtype": "half" }
+      ]
+    },
+    {
+      "name": "raft_ivf_flat.nlist1024",
+      "algo": "raft_ivf_flat",
+      "build_param": {"nlist": 1024, "ratio": 1, "niter": 25},
+      "file": "sift-128-euclidean/raft_ivf_flat/nlist1024",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000}
+      ]
+    },
+    {
+      "name": "raft_ivf_flat.nlist16384",
+      "algo": "raft_ivf_flat",
+      "build_param": {"nlist": 16384, "ratio": 2, "niter": 20},
+      "file": "sift-128-euclidean/raft_ivf_flat/nlist16384",
+      "search_params": [
+        {"nprobe": 1},
+        {"nprobe": 5},
+        {"nprobe": 10},
+        {"nprobe": 50},
+        {"nprobe": 100},
+        {"nprobe": 200},
+        {"nprobe": 500},
+        {"nprobe": 1000},
+        {"nprobe": 2000}
+      ]
+    },
+    {
+      "name": "raft_cagra.dim32",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 32},
+      "file": "sift-128-euclidean/raft_cagra/dim32",
+      "search_params": [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ]
+    },
+    {
+      "name": "raft_cagra.dim64",
+      "algo": "raft_cagra",
+      "build_param": {"index_dim": 64},
+      "file": "sift-128-euclidean/raft_cagra/dim64",
+      "search_params": [
+        {"itopk": 32},
+        {"itopk": 64},
+        {"itopk": 128}
+      ]
+    }
+  ]
+}
diff --git a/bench/ann/split_groundtruth.py b/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
similarity index 63%
rename from bench/ann/split_groundtruth.py
rename to python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
index cd67d9c8b8..161617f85c 100644
--- a/bench/ann/split_groundtruth.py
+++ b/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
@@ -19,25 +19,26 @@
 
 
 def split_groundtruth(groundtruth_filepath):
-    ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
-                                         "cpp/bench/ann/scripts")
-    ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
-                                          "split_groundtruth.pl")
+    ann_bench_scripts_path = "split_groundtruth.pl"
     pwd = os.getcwd()
     os.chdir("/".join(groundtruth_filepath.split("/")[:-1]))
     groundtruth_filename = groundtruth_filepath.split("/")[-1]
-    p = subprocess.Popen([ann_bench_scripts_path, groundtruth_filename, 
-                          "groundtruth"])
+    p = subprocess.Popen(
+        [ann_bench_scripts_path, groundtruth_filename, "groundtruth"]
+    )
     p.wait()
     os.chdir(pwd)
 
 
 def main():
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--groundtruth",
-                        help="Path to billion-scale dataset groundtruth file",
-                        required=True)
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--groundtruth",
+        help="Path to billion-scale dataset groundtruth file",
+        required=True,
+    )
     args = parser.parse_args()
 
     split_groundtruth(args.groundtruth)
diff --git a/cpp/bench/ann/scripts/split_groundtruth.pl b/python/raft-ann-bench/raft-ann-bench/split_groundtruth/split_groundtruth.pl
similarity index 100%
rename from cpp/bench/ann/scripts/split_groundtruth.pl
rename to python/raft-ann-bench/raft-ann-bench/split_groundtruth/split_groundtruth.pl
diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml
index bdbcf61e0f..3c81b6f16b 100644
--- a/python/raft-dask/pyproject.toml
+++ b/python/raft-dask/pyproject.toml
@@ -35,8 +35,8 @@ license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
     "dask-cuda==23.10.*",
-    "dask>=2023.7.1",
-    "distributed>=2023.7.1",
+    "dask==2023.7.1",
+    "distributed==2023.7.1",
     "joblib>=0.11",
     "numba>=0.57",
     "numpy>=1.21",

From 697ab89e0b9840e878af11c6eb7c68037e3f0a29 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 28 Aug 2023 15:35:37 -0500
Subject: [PATCH 39/70] FIX Add openmp changes from main branch

---
 cpp/CMakeLists.txt | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ecb74ad306..0b009cce93 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -143,12 +143,15 @@ endif()
 rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
 
 if(NOT DISABLE_OPENMP)
-  find_package(OpenMP)
+  rapids_find_package(
+    OpenMP REQUIRED
+    BUILD_EXPORT_SET raft-exports
+    INSTALL_EXPORT_SET raft-exports
+  )
   if(OPENMP_FOUND)
     message(VERBOSE "RAFT: OpenMP found in ${OpenMP_CXX_INCLUDE_DIRS}")
   endif()
 endif()
-
 # * find CUDAToolkit package
 # * determine GPU architectures
 # * enable the CMake CUDA language

From 8b0c4c2fd1e75c1dc6502299dddcb13248ec31af Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 28 Aug 2023 16:41:20 -0500
Subject: [PATCH 40/70] FIX recipe env variables

---
 conda/recipes/raft-ann-bench-cpu/meta.yaml | 3 +--
 conda/recipes/raft-ann-bench/meta.yaml     | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml
index c0450b9e8a..355ea640ff 100644
--- a/conda/recipes/raft-ann-bench-cpu/meta.yaml
+++ b/conda/recipes/raft-ann-bench-cpu/meta.yaml
@@ -4,9 +4,8 @@
 #   conda build . -c conda-forge -c nvidia -c rapidsai
 {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set py_version = environ['CONDA_PY'] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
-{% set cuda_major = cuda_version.split('.')[0] %}
-{% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0
 {% set date_string = environ['RAPIDS_DATE_STRING'] %}
 
 package:
diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml
index 6e5580dad2..882ff6cc49 100644
--- a/conda/recipes/raft-ann-bench/meta.yaml
+++ b/conda/recipes/raft-ann-bench/meta.yaml
@@ -4,6 +4,7 @@
 #   conda build . -c conda-forge -c nvidia -c rapidsai
 {% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev').lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
+{% set py_version = environ['CONDA_PY'] %}
 {% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %}
 {% set cuda_major = cuda_version.split('.')[0] %}
 {% set cuda_spec = ">=" + cuda_major ~ ",<" + (cuda_major | int + 1) ~ ".0a0" %} # i.e. >=11,<12.0a0

From 49fd31d5e18f678810e8e4a96243c4bdcbb6750c Mon Sep 17 00:00:00 2001
From: divyegala <divyegala@gmail.com>
Date: Mon, 28 Aug 2023 16:41:17 -0700
Subject: [PATCH 41/70] adding build time plot

---
 bench/ann/plot.py | 217 +++++++++++++++++++++++++++++++++++++---------
 bench/ann/run.py  |  12 +--
 2 files changed, 181 insertions(+), 48 deletions(-)

diff --git a/bench/ann/plot.py b/bench/ann/plot.py
index c8fe947684..99c3808386 100644
--- a/bench/ann/plot.py
+++ b/bench/ann/plot.py
@@ -23,9 +23,11 @@
 
 mpl.use("Agg")  # noqa
 import argparse
+from collections import OrderedDict
 import itertools
 import matplotlib.pyplot as plt
 import numpy as np
+import pandas as pd
 import os
 
 
@@ -42,6 +44,16 @@
     }
 }
 
+def positive_int(input_str: str) -> int:
+    try:
+        i = int(input_str)
+        if i < 1:
+            raise ValueError
+    except ValueError:
+        raise argparse.ArgumentTypeError(f"{input_str} is not a positive integer")
+
+    return i
+
 
 def generate_n_colors(n):
     vs = np.linspace(0.3, 0.9, 7)
@@ -76,42 +88,35 @@ def get_left_right(metric):
     return "right"
 
 
-def get_plot_label(xm, ym):
-    template = "%(xlabel)s-%(ylabel)s tradeoff - %(updown)s and" " to the %(leftright)s is better"
-    return template % {
-        "xlabel": xm["description"],
-        "ylabel": ym["description"],
-        "updown": get_up_down(ym),
-        "leftright": get_left_right(xm),
-    }
-
-
 def create_pointset(data, xn, yn):
     xm, ym = (metrics[xn], metrics[yn])
     rev_y = -1 if ym["worst"] < 0 else 1
     rev_x = -1 if xm["worst"] < 0 else 1
     data.sort(key=lambda t: (rev_y * t[-1], rev_x * t[-2]))
 
-    axs, ays, als = [], [], []
+    axs, ays, als, aidxs = [], [], [], []
     # Generate Pareto frontier
-    xs, ys, ls = [], [], []
+    xs, ys, ls, idxs = [], [], [], []
     last_x = xm["worst"]
     comparator = (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
-    for algo_name, xv, yv in data:
+    for algo_name, index_name, xv, yv in data:
         if not xv or not yv:
             continue
         axs.append(xv)
         ays.append(yv)
         als.append(algo_name)
+        aidxs.append(algo_name)
         if comparator(xv, last_x):
             last_x = xv
             xs.append(xv)
             ys.append(yv)
             ls.append(algo_name)
-    return xs, ys, ls, axs, ays, als
+            idxs.append(index_name)
+    return xs, ys, ls, idxs, axs, ays, als, aidxs
 
 
-def create_plot(all_data, raw, x_scale, y_scale, fn_out, linestyles):
+def create_plot_search(all_data, raw, x_scale, y_scale, fn_out, linestyles,
+                dataset, k, batch_size):
     xn = "k-nn"
     yn = "qps"
     xm, ym = (metrics[xn], metrics[yn])
@@ -122,13 +127,13 @@ def create_plot(all_data, raw, x_scale, y_scale, fn_out, linestyles):
 
     # Sorting by mean y-value helps aligning plots with labels
     def mean_y(algo):
-        xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
+        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(all_data[algo], xn, yn)
         return -np.log(np.array(ys)).mean()
 
     # Find range for logit x-scale
     min_x, max_x = 1, 0
     for algo in sorted(all_data.keys(), key=mean_y):
-        xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
+        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(all_data[algo], xn, yn)
         min_x = min([min_x] + [x for x in xs if x > 0])
         max_x = max([max_x] + [x for x in xs if x < 1])
         color, faded, linestyle, marker = linestyles[algo]
@@ -169,7 +174,7 @@ def inv_fun(x):
     else:
         ax.set_xscale(x_scale)
     ax.set_yscale(y_scale)
-    ax.set_title(get_plot_label(xm, ym))
+    ax.set_title(f"{dataset} k={k} batch_size={batch_size}")
     plt.gca().get_position()
     # plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])
     ax.legend(handles, labels, loc="center left", bbox_to_anchor=(1, 0.5), prop={"size": 9})
@@ -188,37 +193,124 @@ def inv_fun(x):
     # Workaround for bug https://github.com/matplotlib/matplotlib/issues/6789
     ax.spines["bottom"]._adjust_location()
 
+    print(f"writing search output to {fn_out}")
     plt.savefig(fn_out, bbox_inches="tight")
     plt.close()
 
 
-def load_all_results(dataset_path):
+def create_plot_build(build_results, search_results, linestyles, fn_out,
+                      dataset, k, batch_size):
+    xn = "k-nn"
+    yn = "qps"
+
+    recall_85 = [-1] * len(linestyles)
+    qps_85 = [-1] * len(linestyles)
+    bt_85 = [0] * len(linestyles)
+    i_85 = [-1] * len(linestyles)
+    recall_90 = [-1] * len(linestyles)
+    qps_90 = [-1] * len(linestyles)
+    bt_90 = [0] * len(linestyles)
+    i_90 = [-1] * len(linestyles)
+    recall_95 = [-1] * len(linestyles)
+    qps_95 = [-1] * len(linestyles)
+    bt_95 = [0] * len(linestyles)
+    i_95 = [-1] * len(linestyles)
+    data = OrderedDict()
+    colors = OrderedDict()
+
+    # Sorting by mean y-value helps aligning plots with labels
+    def mean_y(algo):
+        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(search_results[algo], xn, yn)
+        return -np.log(np.array(ys)).mean()
+
+    for pos, algo in enumerate(sorted(search_results.keys(), key=mean_y)):
+        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(search_results[algo], xn, yn)
+        # x is recall, y is qps, ls is algo_name, idxs is index_name
+        for i in range(len(xs)):
+            if xs[i] >= 0.85 and xs[i] < 0.9 and ys[i] > qps_85[pos]:
+                qps_85[pos] = ys[i]
+                bt_85[pos] = build_results[(ls[i], idxs[i])][0][2]
+                i_85[pos] = idxs[i]
+            elif xs[i] >= 0.9 and xs[i] < 0.95 and ys[i] > qps_90[pos]:
+                qps_90[pos] = ys[i]
+                bt_90[pos] = build_results[(ls[i], idxs[i])][0][2]
+                i_90[pos] = idxs[i]
+            elif xs[i] >= 0.95 and ys[i] > qps_95[pos]:
+                qps_95[pos] = ys[i]
+                bt_95[pos] = build_results[(ls[i], idxs[i])][0][2]
+                i_95[pos] = idxs[i]
+        data[algo] = [bt_85[pos], bt_90[pos], bt_95[pos]]
+        colors[algo] = linestyles[algo][0]
+    
+    index = ['@85% Recall', '@90% Recall', '@95% Recall']
+    
+    df = pd.DataFrame(data, index=index)
+    plt.figure(figsize=(12, 9))
+    ax = df.plot.bar(rot=0, color=colors)
+    fig = ax.get_figure()
+    print(f"writing search output to {fn_out}")
+    plt.title("Build Time for Highest QPS")
+    plt.suptitle(f"{dataset} k={k} batch_size={batch_size}")
+    plt.ylabel("Build Time (s)")
+    fig.savefig(fn_out)
+
+
+def load_lines(results_path, result_files, method, index_key):
     results = dict()
-    results_path = os.path.join(dataset_path, "result", "search")
-    for result_filepath in os.listdir(results_path):
-        with open(os.path.join(results_path, result_filepath), 'r') as f:
+
+    linebreaker = "name,iterations"
+
+    for result_filename in result_files:
+        with open(os.path.join(results_path, result_filename), 'r') as f:
             lines = f.readlines()
+            lines = lines[:-1] if lines[-1] == "\n" else lines
             idx = 0
             for pos, line in enumerate(lines):
-                if "QPS" in line:
+                if linebreaker in line:
                     idx = pos
                     break
             
-            keys = lines[idx].split(',')
-            recall_idx = -1
-            qps_idx = -1
-            for pos, key in enumerate(keys):
-                if "Recall" in key:
-                    recall_idx = pos
-                if "QPS" in key:
-                    qps_idx = pos
+            if method == "build":
+                if "hnswlib" in result_filename:
+                    key_idx = [2]
+                else:
+                    key_idx = [10]
+            elif method == "search":
+                if "hnswlib" in result_filename:
+                    key_idx = [10, 6]
+                else:
+                    key_idx = [12, 10]
+
             for line in lines[idx+1:]:
                 split_lines = line.split(',')
                 algo_name = split_lines[0].split('.')[0].strip("\"")
-                if algo_name not in results:
-                    results[algo_name] = []
-                results[algo_name].append([algo_name, float(split_lines[recall_idx]), 
-                                    float(split_lines[qps_idx])])
+                index_name = split_lines[0].split('/')[0].strip("\"")
+
+                if index_key == "algo":
+                    dict_key = algo_name
+                elif index_key == "index":
+                    dict_key = (algo_name, index_name)
+                if dict_key not in results:
+                    results[dict_key] = []
+                to_add = [algo_name, index_name]
+                for key_i in key_idx:
+                    to_add.append(float(split_lines[key_i]))
+                results[dict_key].append(to_add)
+
+    return results
+
+
+def load_all_results(dataset_path, algorithms, k, batch_size, method, index_key):
+    results_path = os.path.join(dataset_path, "result", method)
+    result_files = os.listdir(results_path)
+    result_files = [result_filename for result_filename in result_files \
+                    if f"{k}-{batch_size}" in result_filename]
+    if len(algorithms) > 0:
+        result_files = [result_filename for result_filename in result_files if \
+                        result_filename.split('-')[0] in algorithms]
+
+    results = load_lines(results_path, result_files, method, index_key)
+
     return results
 
 
@@ -233,6 +325,24 @@ def main():
     parser.add_argument("--output-filepath",
                         help="directory for PNG to be saved",
                         default=os.getcwd())
+    parser.add_argument("--algorithms",
+                        help="plot only comma separated list of named \
+                              algorithms",
+                        default=None)
+    parser.add_argument(
+        "-k", "--count", default=10, type=positive_int, help="the number of nearest neighbors to search for"
+    )
+    parser.add_argument(
+        "-bs", "--batch-size", default=10000, type=positive_int, help="number of query vectors to use in each query trial"
+    )
+    parser.add_argument(
+        "--build",
+        action="store_true"
+    )
+    parser.add_argument(
+        "--search",
+        action="store_true"
+    )
     parser.add_argument(
         "--x-scale",
         help="Scale to use when drawing the X-axis. \
@@ -248,15 +358,38 @@ def main():
     parser.add_argument(
         "--raw", help="Show raw results (not just Pareto frontier) in faded colours", action="store_true"
     )
-    args = parser.parse_args()
 
-    output_filepath = os.path.join(args.output_filepath, args.dataset + ".png")
-    print(f"writing output to {output_filepath}")
-
-    results = load_all_results(os.path.join(args.dataset_path, args.dataset))
-    linestyles = create_linestyles(sorted(results.keys()))
+    args = parser.parse_args()
 
-    create_plot(results, args.raw, args.x_scale, args.y_scale, output_filepath, linestyles)
+    if args.algorithms:
+        algorithms = args.algorithms.split(',')
+    else:
+        algorithms = []
+    k = args.count
+    batch_size = args.batch_size
+    if not args.build and not args.search:
+        build = True
+        search = True
+    else:
+        build = args.build
+        search = args.search
+
+    search_output_filepath = os.path.join(args.output_filepath, f"search-{args.dataset}-{k}-{batch_size}.png")
+    build_output_filepath = os.path.join(args.output_filepath, f"build-{args.dataset}-{k}-{batch_size}.png")
+
+    search_results = load_all_results(
+                        os.path.join(args.dataset_path, args.dataset),
+                        algorithms, k, batch_size, "search", "algo")
+    linestyles = create_linestyles(sorted(search_results.keys()))
+    if search:
+        create_plot_search(search_results, args.raw, args.x_scale, args.y_scale, 
+                           search_output_filepath, linestyles, args.dataset, k, batch_size)
+    if build:
+        build_results = load_all_results(
+            os.path.join(args.dataset_path, args.dataset),
+            algorithms, k, batch_size, "build", "index")
+        create_plot_build(build_results, search_results, linestyles, build_output_filepath,
+                          args.dataset, k, batch_size)
 
 
 if __name__ == "__main__":
diff --git a/bench/ann/run.py b/bench/ann/run.py
index 5c927d5066..5cac54506f 100644
--- a/bench/ann/run.py
+++ b/bench/ann/run.py
@@ -35,15 +35,15 @@ def validate_algorithm(algos_conf, algo):
     return algo in algos_conf_keys and not algos_conf[algo]["disabled"]
 
 
-def find_executable(algos_conf, algo):
+def find_executable(algos_conf, algo, k, batch_size):
     executable = algos_conf[algo]["executable"]
     conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann",
                               executable)
     build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
     if os.path.exists(conda_path):
-        return (executable, conda_path, algo)
+        return (executable, conda_path, f"{algo}-{k}-{batch_size}")
     elif os.path.exists(build_path):
-        return (executable, build_path, algo)
+        return (executable, build_path, f"{algo}-{k}-{batch_size}")
     else:
         raise FileNotFoundError(executable)
 
@@ -198,7 +198,7 @@ def main():
             curr_algo = index["algo"]
             if index["name"] in indices and \
                     validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo)
+                executable_path = find_executable(algos_conf, curr_algo, k, batch_size)
                 if executable_path not in executables_to_run:
                     executables_to_run[executable_path] = {"index": []}
                 executables_to_run[executable_path]["index"].append(index)
@@ -212,7 +212,7 @@ def main():
             curr_algo = index["algo"]
             if curr_algo in algorithms and \
                     validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo)
+                executable_path = find_executable(algos_conf, curr_algo, k, batch_size)
                 if executable_path not in executables_to_run:
                     executables_to_run[executable_path] = {"index": []}
                 executables_to_run[executable_path]["index"].append(index)
@@ -222,7 +222,7 @@ def main():
         for index in conf_file["index"]:
             curr_algo = index["algo"]
             if validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo)
+                executable_path = find_executable(algos_conf, curr_algo, k, batch_size)
                 if executable_path not in executables_to_run:
                     executables_to_run[executable_path] = {"index": []}
                 executables_to_run[executable_path]["index"].append(index)

From e92827a31ade30fcdd14b5be90764ae982ea1b0b Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 28 Aug 2023 18:47:31 -0500
Subject: [PATCH 42/70] FIX flag in the wrong conditional in build.sh

---
 build.sh                                                     | 5 ++---
 .../{build_raft_nn_bench_cpu.sh => build.sh}                 | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)
 rename conda/recipes/raft-ann-bench-cpu/{build_raft_nn_bench_cpu.sh => build.sh} (86%)

diff --git a/build.sh b/build.sh
index 3ae6c338db..c3da5686f3 100755
--- a/build.sh
+++ b/build.sh
@@ -346,9 +346,9 @@ if hasArg bench-ann || (( ${NUMARGS} == 0 )); then
     CMAKE_TARGET="${CMAKE_TARGET};${ANN_BENCH_TARGETS}"
     if hasArg cpu-only; then
         COMPILE_LIBRARY=OFF
+        CPU_ONLY=ON
     else
         COMPILE_LIBRARY=ON
-        CPU_ONLY=ON
     fi
 fi
 
@@ -498,8 +498,7 @@ fi
 
 # Build and (optionally) install the raft-ann-bench Python package
 if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
-    SKBUILD_CONFIGURE_OPTIONS="${SKBUILD_EXTRA_CMAKE_ARGS}" \
-        python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-ann-bench
+    python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-ann-bench
 fi
 
 if hasArg docs; then
diff --git a/conda/recipes/raft-ann-bench-cpu/build_raft_nn_bench_cpu.sh b/conda/recipes/raft-ann-bench-cpu/build.sh
similarity index 86%
rename from conda/recipes/raft-ann-bench-cpu/build_raft_nn_bench_cpu.sh
rename to conda/recipes/raft-ann-bench-cpu/build.sh
index b796b4d7d5..4462d5124b 100644
--- a/conda/recipes/raft-ann-bench-cpu/build_raft_nn_bench_cpu.sh
+++ b/conda/recipes/raft-ann-bench-cpu/build.sh
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
 # Copyright (c) 2023, NVIDIA CORPORATION.
 
-./build.sh bench-ann --cpu-only --no-nvtx --build-metrics=bench_ann --incl-cache-stats
+./build.sh bench-ann --cpu-only --no-nvtx --build-metrics=bench_ann_cpu --incl-cache-stats
 cmake --install cpp/build --component ann_bench

From b9defb76a8ca3dd3d4a5ca47f4858f5aa6e76950 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 28 Aug 2023 20:32:01 -0500
Subject: [PATCH 43/70] FIX remove accidentally deleted file

---
 .../conda_build_config.yaml                   | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml

diff --git a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml
new file mode 100644
index 0000000000..0bd424f85b
--- /dev/null
+++ b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml
@@ -0,0 +1,20 @@
+c_compiler_version:
+  - 11
+
+cxx_compiler_version:
+  - 11
+
+sysroot_version:
+  - "2.17"
+
+cmake_version:
+  - ">=3.26.4"
+
+glog_version:
+  - ">=0.6.0"
+
+h5py_version:
+  - ">=3.8.0"
+
+nlohmann_json_version:
+  - ">=3.11.2"

From b9e9ea67e04b3883d977f7283b2baea2b88ced0b Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 28 Aug 2023 22:28:26 -0500
Subject: [PATCH 44/70] FIX build.sh flag that was deleted in a bad merge

---
 build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.sh b/build.sh
index c3da5686f3..aac0fcb080 100755
--- a/build.sh
+++ b/build.sh
@@ -18,7 +18,7 @@ ARGS=$*
 # scripts, and that this script resides in the repo dir!
 REPODIR=$(cd $(dirname $0); pwd)
 
-VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall  -v -g -n --compile-lib --allgpuarch --no-nvtx --show_depr_warn --incl-cache-stats --time -h"
+VALIDARGS="clean libraft pylibraft raft-dask docs tests template bench-prims bench-ann clean --uninstall  -v -g -n --compile-lib --allgpuarch --no-nvtx --cpu-only --show_depr_warn --incl-cache-stats --time -h"
 HELP="$0 [<target> ...] [<flag> ...] [--cmake-args=\"<args>\"] [--cache-tool=<tool>] [--limit-tests=<targets>] [--limit-bench-prims=<targets>] [--limit-bench-ann=<targets>] [--build-metrics=<filename>]
  where <target> is:
    clean            - remove all existing build artifacts and configuration (start over)

From 913dec23a6db84769d091e0c70f74f95bab91097 Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Tue, 29 Aug 2023 12:53:39 +0200
Subject: [PATCH 45/70] Move the 'dump_parameters' earlier in the benchmarks to
 have higher chance to capture them.

---
 cpp/bench/ann/src/common/benchmark.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index 6a8c6b933e..5ce453a116 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -126,6 +126,7 @@ void bench_build(::benchmark::State& state,
                  Configuration::Index index,
                  bool force_overwrite)
 {
+  dump_parameters(state, index.build_param);
   if (file_exists(index.file)) {
     if (force_overwrite) {
       log_info("Overwriting file: %s", index.file.c_str());
@@ -163,7 +164,6 @@ void bench_build(::benchmark::State& state,
   }
   state.counters.insert(
     {{"GPU Time", gpu_timer.total_time() / state.iterations()}, {"index_size", index_size}});
-  dump_parameters(state, index.build_param);
 
   if (state.skipped()) { return; }
   make_sure_parent_dir_exists(index.file);
@@ -177,6 +177,7 @@ void bench_search(::benchmark::State& state,
                   std::size_t search_param_ix)
 {
   const auto& sp_json = index.search_params[search_param_ix];
+  dump_parameters(state, sp_json);
 
   // NB: `k` and `n_queries` are guaranteed to be populated in conf.cpp
   const std::uint32_t k = sp_json["k"];
@@ -258,7 +259,6 @@ void bench_search(::benchmark::State& state,
     state.counters.insert({{"GPU Time", gpu_timer.total_time() / state.iterations()},
                            {"GPU QPS", queries_processed / gpu_timer.total_time()}});
   }
-  dump_parameters(state, sp_json);
   if (state.skipped()) { return; }
 
   // evaluate recall

From 8861fc8eb05613e94f67812dd7cc8defe432a451 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 29 Aug 2023 08:56:00 -0400
Subject: [PATCH 46/70] Implementing some of the review feedback

---
 bench/ann/conf/bigann-100M.json               |   4 -
 bench/ann/conf/deep-100M.json                 |  16 --
 bench/ann/conf/deep-image-96-angular.json     |  20 +--
 .../ann/conf/fashion-mnist-784-euclidean.json |  15 --
 .../bench_ann_cuda-118_arch-x86_64.yaml       |   1 +
 cpp/bench/ann/src/common/conf.cpp             | 152 ------------------
 cpp/bench/ann/src/common/conf.h               |  76 ---------
 cpp/bench/ann/src/common/conf.hpp             |   3 +-
 dependencies.yaml                             |   1 +
 9 files changed, 8 insertions(+), 280 deletions(-)
 delete mode 100644 cpp/bench/ann/src/common/conf.cpp
 delete mode 100644 cpp/bench/ann/src/common/conf.h

diff --git a/bench/ann/conf/bigann-100M.json b/bench/ann/conf/bigann-100M.json
index e7d8661125..bc4ae40ff8 100644
--- a/bench/ann/conf/bigann-100M.json
+++ b/bench/ann/conf/bigann-100M.json
@@ -19,7 +19,6 @@
       "algo": "raft_ivf_pq",
       "build_param": {"niter": 25, "nlist": 5000, "pq_dim": 64, "ratio": 10},
       "file": "bigann-100M/raft_ivf_pq/dimpq64-cluster5K",
-      "dataset_memtype": "host",
       "search_params": [
         { "nprobe": 20, "internalDistanceDtype": "float", "smemLutDtype": "float" },
         { "nprobe": 30, "internalDistanceDtype": "float", "smemLutDtype": "float" },
@@ -155,7 +154,6 @@
       "name": "raft_ivf_flat.nlist100K",
       "algo": "raft_ivf_flat",
       "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
-      "dataset_memtype":"host",
       "file": "bigann-100M/raft_ivf_flat/nlist100K",
       "search_params": [
         {"max_batch":10000, "max_k":10, "nprobe":20},
@@ -171,7 +169,6 @@
     {
       "name": "raft_cagra.dim32",
       "algo": "raft_cagra",
-      "dataset_memtype": "host",
       "build_param": {"graph_degree": 32},
       "file": "bigann-100M/raft_cagra/dim32",
       "search_params": [
@@ -183,7 +180,6 @@
     {
       "name": "raft_cagra.dim64",
       "algo": "raft_cagra",
-      "dataset_memtype":"host",
       "build_param": {"graph_degree": 64},
       "file": "bigann-100M/raft_cagra/dim64",
       "search_params": [
diff --git a/bench/ann/conf/deep-100M.json b/bench/ann/conf/deep-100M.json
index f95df5e965..6bef94c070 100644
--- a/bench/ann/conf/deep-100M.json
+++ b/bench/ann/conf/deep-100M.json
@@ -219,7 +219,6 @@
     {
       "name": "raft_ivf_flat.nlist100K",
       "algo": "raft_ivf_flat",
-      "dataset_memtype":"host",
       "build_param": {"nlist": 100000, "niter": 25, "ratio": 5},
       "file": "deep-100M/raft_ivf_flat/nlist100K",
       "search_params": [
@@ -236,7 +235,6 @@
     {
       "name": "raft_ivf_flat.nlist200K",
       "algo": "raft_ivf_flat",
-      "dataset_memtype":"host",
       "build_param": {"nlist": 200000, "niter": 25, "ratio": 5},
       "file": "deep-100M/raft_ivf_flat/nlist200K",
       "search_params": [
@@ -254,7 +252,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {"nlist": 1024, "pq_dim": 128, "ratio": 1, "niter": 25
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024",
@@ -271,7 +268,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -294,7 +290,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -315,7 +310,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -336,7 +330,6 @@
     {
       "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 64,
@@ -357,7 +350,6 @@
     {
       "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 64,
@@ -378,7 +370,6 @@
     {
       "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 32,
@@ -399,7 +390,6 @@
     {
       "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 16,
@@ -420,7 +410,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -441,7 +430,6 @@
     {
       "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 512,
@@ -462,7 +450,6 @@
     {
       "name": "raft_cagra.dim32",
       "algo": "raft_cagra",
-      "dataset_memtype":"host",
       "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48},
       "file": "deep-100M/raft_cagra/dim32",
       "search_params": [
@@ -483,7 +470,6 @@
     {
       "name": "raft_cagra.dim32.multi_cta",
       "algo": "raft_cagra",
-      "dataset_memtype":"host",
       "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48},
       "file": "deep-100M/raft_cagra/dim32",
       "search_params": [
@@ -504,7 +490,6 @@
     {
       "name": "raft_cagra.dim32.multi_kernel",
       "algo": "raft_cagra",
-      "dataset_memtype":"host",
       "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48},
       "file": "deep-100M/raft_cagra/dim32",
       "search_params": [
@@ -525,7 +510,6 @@
     {
       "name": "raft_cagra.dim64",
       "algo": "raft_cagra",
-      "dataset_memtype":"host",
       "build_param": {"graph_degree": 64},
       "file": "deep-100M/raft_cagra/dim64",
       "search_params": [
diff --git a/bench/ann/conf/deep-image-96-angular.json b/bench/ann/conf/deep-image-96-angular.json
index 72795c449d..0724d8b09f 100644
--- a/bench/ann/conf/deep-image-96-angular.json
+++ b/bench/ann/conf/deep-image-96-angular.json
@@ -94,7 +94,7 @@
     {
       "name": "raft_bfknn",
       "algo": "raft_bfknn",
-      "dataset_memtype": "device",
+
       "build_param": {},
       "file": "index/deep-image-96-angular/raft_bfknn/bfknn",
       "search_params": [
@@ -702,7 +702,7 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
+
       "build_param": {"nlist": 1024, "pq_dim": 128, "ratio": 1, "niter": 25
       },
       "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024",
@@ -719,7 +719,7 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
+
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -742,7 +742,7 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
+
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -763,7 +763,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -784,7 +783,6 @@
     {
       "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 64,
@@ -805,7 +803,6 @@
     {
       "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 64,
@@ -826,7 +823,6 @@
     {
       "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 32,
@@ -847,7 +843,7 @@
     {
       "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
+
       "build_param": {
         "nlist": 1024,
         "pq_dim": 16,
@@ -868,7 +864,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -889,7 +884,6 @@
     {
       "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 512,
@@ -910,7 +904,6 @@
     {
       "name": "raft_ivf_flat.nlist1024",
       "algo": "raft_ivf_flat",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "ratio": 1,
@@ -948,7 +941,6 @@
     {
       "name": "raft_ivf_flat.nlist16384",
       "algo": "raft_ivf_flat",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 16384,
         "ratio": 2,
@@ -990,7 +982,6 @@
     {
       "name" : "raft_cagra.dim32",
       "algo" : "raft_cagra",
-      "dataset_memtype": "device",
       "build_param": {
         "graph_degree" : 32
       },
@@ -1006,7 +997,6 @@
     {
       "name" : "raft_cagra.dim64",
       "algo" : "raft_cagra",
-      "dataset_memtype": "device",
       "build_param": {
         "graph_degree" : 64
       },
diff --git a/bench/ann/conf/fashion-mnist-784-euclidean.json b/bench/ann/conf/fashion-mnist-784-euclidean.json
index 1a24eed018..d1b58be367 100644
--- a/bench/ann/conf/fashion-mnist-784-euclidean.json
+++ b/bench/ann/conf/fashion-mnist-784-euclidean.json
@@ -94,7 +94,6 @@
     {
       "name": "raft_bfknn",
       "algo": "raft_bfknn",
-      "dataset_memtype": "device",
       "build_param": {},
       "file": "index/fashion-mnist-784-euclidean/raft_bfknn/bfknn",
       "search_params": [
@@ -726,7 +725,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -777,7 +775,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -846,7 +843,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -897,7 +893,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -948,7 +943,6 @@
     {
       "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 64,
@@ -999,7 +993,6 @@
     {
       "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 64,
@@ -1050,7 +1043,6 @@
     {
       "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 32,
@@ -1101,7 +1093,6 @@
     {
       "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 16,
@@ -1152,7 +1143,6 @@
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 128,
@@ -1203,7 +1193,6 @@
     {
       "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
       "algo": "raft_ivf_pq",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "pq_dim": 512,
@@ -1254,7 +1243,6 @@
     {
       "name": "raft_ivf_flat.nlist1024",
       "algo": "raft_ivf_flat",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 1024,
         "ratio": 1,
@@ -1292,7 +1280,6 @@
     {
       "name": "raft_ivf_flat.nlist16384",
       "algo": "raft_ivf_flat",
-      "dataset_memtype": "device",
       "build_param": {
         "nlist": 16384,
         "ratio": 2,
@@ -1334,7 +1321,6 @@
     {
       "name" : "raft_cagra.dim32",
       "algo" : "raft_cagra",
-      "dataset_memtype": "device",
       "build_param": {
         "graph_degree" : 32
       },
@@ -1350,7 +1336,6 @@
     {
       "name" : "raft_cagra.dim64",
       "algo" : "raft_cagra",
-      "dataset_memtype": "device",
       "build_param": {
         "graph_degree" : 64
       },
diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
index 37a4042aac..5eab55be13 100644
--- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
@@ -35,6 +35,7 @@ dependencies:
 - nccl>=2.9.9
 - ninja
 - nlohmann_json>=3.11.2
+- pandas
 - pyyaml
 - scikit-build>=0.13.1
 - sysroot_linux-64==2.17
diff --git a/cpp/bench/ann/src/common/conf.cpp b/cpp/bench/ann/src/common/conf.cpp
deleted file mode 100644
index 098313db45..0000000000
--- a/cpp/bench/ann/src/common/conf.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "conf.h"
-
-#include <iostream>
-#include <stdexcept>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#include "util.hpp"
-
-namespace raft::bench::ann {
-using std::runtime_error;
-using std::string;
-using std::unordered_set;
-using std::vector;
-
-Configuration::Configuration(std::istream& conf_stream)
-{
-  // to enable comments in json
-  auto conf = nlohmann::json::parse(conf_stream, nullptr, true, true);
-
-  parse_dataset_(conf.at("dataset"));
-  parse_index_(conf.at("index"), conf.at("search_basic_param"));
-}
-
-vector<Configuration::Index> Configuration::get_indices(const string& patterns) const
-{
-  vector<string> names;
-  for (const auto& index : indices_) {
-    names.push_back(index.name);
-  }
-
-  auto matched = match_(names, patterns);
-  if (matched.empty()) { throw runtime_error("no available index matches '" + patterns + "'"); }
-
-  vector<Index> res;
-  for (const auto& index : indices_) {
-    if (matched.find(index.name) != matched.end()) { res.push_back(index); }
-  }
-  return res;
-}
-
-void Configuration::parse_dataset_(const nlohmann::json& conf)
-{
-  dataset_conf_.name       = conf.at("name");
-  dataset_conf_.base_file  = conf.at("base_file");
-  dataset_conf_.query_file = conf.at("query_file");
-  dataset_conf_.distance   = conf.at("distance");
-
-  if (conf.contains("subset_first_row")) {
-    dataset_conf_.subset_first_row = conf.at("subset_first_row");
-  }
-  if (conf.contains("subset_size")) { dataset_conf_.subset_size = conf.at("subset_size"); }
-
-  if (conf.contains("dtype")) {
-    dataset_conf_.dtype = conf.at("dtype");
-  } else {
-    auto filename = dataset_conf_.base_file;
-    if (!filename.compare(filename.size() - 4, 4, "fbin")) {
-      dataset_conf_.dtype = "float";
-    } else if (!filename.compare(filename.size() - 5, 5, "u8bin")) {
-      dataset_conf_.dtype = "uint8";
-    } else if (!filename.compare(filename.size() - 5, 5, "i8bin")) {
-      dataset_conf_.dtype = "int8";
-    } else {
-      log_error("Could not determine data type of the dataset %s", filename.c_str());
-    }
-  }
-}
-
-void Configuration::parse_index_(const nlohmann::json& index_conf,
-                                 const nlohmann::json& search_basic_conf)
-{
-  const int batch_size = search_basic_conf.at("batch_size");
-  const int k          = search_basic_conf.at("k");
-  const int run_count  = search_basic_conf.at("run_count");
-
-  for (const auto& conf : index_conf) {
-    Index index;
-    index.name        = conf.at("name");
-    index.algo        = conf.at("algo");
-    index.build_param = conf.at("build_param");
-    index.file        = conf.at("file");
-    index.batch_size  = batch_size;
-    index.k           = k;
-    index.run_count   = run_count;
-    index.index_conf  = index_conf;
-
-    if (conf.contains("multigpu")) {
-      for (auto it : conf.at("multigpu")) {
-        index.dev_list.push_back(it);
-      }
-      if (index.dev_list.empty()) { throw std::runtime_error("dev_list shouln't be empty!"); }
-      index.dev_list.shrink_to_fit();
-      index.build_param["multigpu"] = conf["multigpu"];
-    }
-
-    if (conf.contains("refine_ratio")) {
-      float refine_ratio = conf.at("refine_ratio");
-      if (refine_ratio <= 1.0f) {
-        throw runtime_error("'" + index.name + "': refine_ratio should > 1.0");
-      }
-      index.refine_ratio = refine_ratio;
-    }
-
-    for (const auto& param : conf.at("search_params")) {
-      index.search_params.push_back(param);
-    }
-    index.search_result_file = conf.at("search_result_file");
-
-    indices_.push_back(index);
-  }
-}
-
-unordered_set<string> Configuration::match_(const vector<string>& candidates,
-                                            const string& patterns) const
-{
-  unordered_set<string> matched;
-  for (const auto& pat : split(patterns, ',')) {
-    if (pat.empty()) { continue; }
-
-    if (pat.back() == '*') {
-      auto len = pat.size() - 1;
-      for (const auto& item : candidates) {
-        if (item.compare(0, len, pat, 0, len) == 0) { matched.insert(item); }
-      }
-    } else {
-      for (const auto& item : candidates) {
-        if (item == pat) { matched.insert(item); }
-      }
-    }
-  }
-
-  return matched;
-}
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/conf.h b/cpp/bench/ann/src/common/conf.h
deleted file mode 100644
index bf3a0cba64..0000000000
--- a/cpp/bench/ann/src/common/conf.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-#include <iostream>
-#include <string>
-#include <unordered_set>
-#include <vector>
-
-#define JSON_DIAGNOSTICS 1
-#include <nlohmann/json.hpp>
-
-namespace raft::bench::ann {
-
-class Configuration {
- public:
-  struct Index {
-    std::string name;
-    std::string algo;
-    nlohmann::json build_param;
-    nlohmann::json index_conf;
-    std::string file;
-    std::vector<int> dev_list;
-
-    int batch_size;
-    int k;
-    int run_count;
-    std::vector<nlohmann::json> search_params;
-    std::string search_result_file;
-    float refine_ratio{0.0f};
-  };
-
-  struct DatasetConf {
-    std::string name;
-    std::string base_file;
-    // use only a subset of base_file,
-    // the range of rows is [subset_first_row, subset_first_row + subset_size)
-    // however, subset_size = 0 means using all rows after subset_first_row
-    // that is, the subset is [subset_first_row, #rows in base_file)
-    size_t subset_first_row{0};
-    size_t subset_size{0};
-    std::string query_file;
-    std::string distance;
-
-    // data type of input dataset, possible values ["float", "int8", "uint8"]
-    std::string dtype;
-  };
-
-  Configuration(std::istream& conf_stream);
-
-  DatasetConf get_dataset_conf() const { return dataset_conf_; }
-  std::vector<Index> get_indices(const std::string& patterns) const;
-
- private:
-  void parse_dataset_(const nlohmann::json& conf);
-  void parse_index_(const nlohmann::json& index_conf, const nlohmann::json& search_basic_conf);
-  std::unordered_set<std::string> match_(const std::vector<std::string>& candidates,
-                                         const std::string& patterns) const;
-
-  DatasetConf dataset_conf_;
-  std::vector<Index> indices_;
-};
-
-}  // namespace raft::bench::ann
diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp
index b22986814d..cc1883b56b 100644
--- a/cpp/bench/ann/src/common/conf.hpp
+++ b/cpp/bench/ann/src/common/conf.hpp
@@ -92,7 +92,7 @@ class Configuration {
       dataset_conf_.dtype = conf.at("dtype");
     } else {
       auto filename = dataset_conf_.base_file;
-      if (!filename.compare(filename.size() - 4, 4, "fbin")) {
+      if (!filename.compare(filename.size() - 4, 24, "fbin")) {
         dataset_conf_.dtype = "float";
       } else if (!filename.compare(filename.size() - 5, 5, "u8bin")) {
         dataset_conf_.dtype = "uint8";
@@ -117,7 +117,6 @@ class Configuration {
       index.file        = conf.at("file");
       index.batch_size  = batch_size;
       index.k           = k;
-      index.index_conf  = conf;
 
       if (conf.contains("multigpu")) {
         for (auto it : conf.at("multigpu")) {
diff --git a/dependencies.yaml b/dependencies.yaml
index 9a0807143c..64a6669f92 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -172,6 +172,7 @@ dependencies:
           - faiss-proc=*=cuda
           - matplotlib
           - pyyaml
+          - pandas
 
   cudatoolkit:
     specific:

From 2f52b02e01a2da1bb3db8763c1d702484086d54b Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 29 Aug 2023 08:58:24 -0400
Subject: [PATCH 47/70] Bench ann

---
 bench/ann/run.py                  | 1 +
 cpp/bench/ann/src/common/conf.hpp | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/bench/ann/run.py b/bench/ann/run.py
index 5cac54506f..c00ced8b3b 100644
--- a/bench/ann/run.py
+++ b/bench/ann/run.py
@@ -90,6 +90,7 @@ def run_build_and_search(conf_file, conf_filename, conf_filedir,
                    "--benchmark_counters_tabular",
                    "--override_kv=k:%s" % k,
                    "--override_kv=n_queries:%s" % batch_size,
+                   "--benchmark_min_warmup_time=0.01",
                    "--benchmark_out_format=csv",
                    f"--benchmark_out={os.path.join(search_folder, f'{algo}.csv')}"]
             if force:
diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp
index cc1883b56b..2a651016dd 100644
--- a/cpp/bench/ann/src/common/conf.hpp
+++ b/cpp/bench/ann/src/common/conf.hpp
@@ -36,7 +36,6 @@ class Configuration {
     nlohmann::json build_param;
     std::string file;
     std::vector<int> dev_list;
-    nlohmann::json index_conf;
 
     int batch_size;
     int k;

From 521b6969fbc539f59ffb3990e108ba8d239dbe2d Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 29 Aug 2023 10:41:16 -0400
Subject: [PATCH 48/70] Fixing a couple potential merge artifacts

---
 cpp/bench/ann/src/common/benchmark.cpp | 5 ++---
 cpp/bench/ann/src/common/conf.hpp      | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/cpp/bench/ann/src/common/benchmark.cpp b/cpp/bench/ann/src/common/benchmark.cpp
index 555f14f1bb..283cd2d297 100644
--- a/cpp/bench/ann/src/common/benchmark.cpp
+++ b/cpp/bench/ann/src/common/benchmark.cpp
@@ -81,8 +81,7 @@ auto create_algo(const std::string& algo,
                  const std::string& distance,
                  int dim,
                  const nlohmann::json& conf,
-                 const std::vector<int>& dev_list,
-                 const nlohman::json& index_conf) -> std::unique_ptr<raft::bench::ann::ANN<T>>
+                 const std::vector<int>& dev_list) -> std::unique_ptr<raft::bench::ann::ANN<T>>
 {
   static auto fname = get_fun_name(reinterpret_cast<void*>(&create_algo<T>));
   auto handle       = load_lib(algo);
@@ -91,7 +90,7 @@ auto create_algo(const std::string& algo,
     throw std::runtime_error("Couldn't load the create_algo function (" + algo + ")");
   }
   auto fun = reinterpret_cast<decltype(&create_algo<T>)>(fun_addr);
-  return fun(algo, distance, dim, conf, dev_list, index_conf);
+  return fun(algo, distance, dim, conf, dev_list);
 }
 
 template <typename T>
diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp
index 2a651016dd..405b00a74e 100644
--- a/cpp/bench/ann/src/common/conf.hpp
+++ b/cpp/bench/ann/src/common/conf.hpp
@@ -91,7 +91,7 @@ class Configuration {
       dataset_conf_.dtype = conf.at("dtype");
     } else {
       auto filename = dataset_conf_.base_file;
-      if (!filename.compare(filename.size() - 4, 24, "fbin")) {
+      if (!filename.compare(filename.size() - 4, 4, "fbin")) {
         dataset_conf_.dtype = "float";
       } else if (!filename.compare(filename.size() - 5, 5, "u8bin")) {
         dataset_conf_.dtype = "uint8";

From 94296ca11448a7d5adeb94e5c0a266f2b337f7f0 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Tue, 29 Aug 2023 09:48:11 -0500
Subject: [PATCH 49/70] FIX multiple fixes

---
 build.sh                                         |  4 ++--
 cpp/CMakeLists.txt                               |  6 ++++--
 cpp/bench/ann/CMakeLists.txt                     | 16 ++++++++--------
 python/raft-ann-bench/pyproject.toml             |  7 +++++--
 .../{ => src}/raft-ann-bench/__init__.py         |  0
 .../raft-ann-bench/get_dataset/__main__.py       |  3 ++-
 .../raft-ann-bench/get_dataset/fbin_to_f16bin.py |  0
 .../raft-ann-bench/get_dataset/hdf5_to_fbin.py   |  0
 .../{ => src}/raft-ann-bench/plot/__main__.py    |  0
 .../{ => src}/raft-ann-bench/run/__main__.py     |  0
 .../{ => src}/raft-ann-bench/run/algos-cpu.yaml  |  0
 .../{ => src}/raft-ann-bench/run/algos.yaml      |  0
 .../raft-ann-bench/run/conf/bigann-100M.json     |  0
 .../raft-ann-bench/run/conf/deep-100M.json       |  0
 .../raft-ann-bench/run/conf/deep-1B.json         |  0
 .../run/conf/deep-image-96-angular.json          |  0
 .../run/conf/fashion-mnist-784-euclidean.json    |  0
 .../run/conf/gist-960-euclidean.json             |  0
 .../run/conf/glove-100-angular.json              |  0
 .../raft-ann-bench/run/conf/glove-100-inner.json |  0
 .../run/conf/glove-50-angular.json               |  0
 .../run/conf/lastfm-65-angular.json              |  0
 .../run/conf/mnist-784-euclidean.json            |  0
 .../run/conf/nytimes-256-angular.json            |  0
 .../run/conf/sift-128-euclidean.json             |  0
 .../raft-ann-bench/split_groundtruth/__main__.py |  0
 .../split_groundtruth/split_groundtruth.pl       |  0
 27 files changed, 21 insertions(+), 15 deletions(-)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/__init__.py (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/get_dataset/__main__.py (95%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/get_dataset/fbin_to_f16bin.py (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/get_dataset/hdf5_to_fbin.py (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/plot/__main__.py (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/__main__.py (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/algos-cpu.yaml (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/algos.yaml (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/bigann-100M.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/deep-100M.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/deep-1B.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/deep-image-96-angular.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/gist-960-euclidean.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/glove-100-angular.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/glove-100-inner.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/glove-50-angular.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/lastfm-65-angular.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/mnist-784-euclidean.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/nytimes-256-angular.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/run/conf/sift-128-euclidean.json (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/split_groundtruth/__main__.py (100%)
 rename python/raft-ann-bench/{ => src}/raft-ann-bench/split_groundtruth/split_groundtruth.pl (100%)

diff --git a/build.sh b/build.sh
index aac0fcb080..e54d37f4a1 100755
--- a/build.sh
+++ b/build.sh
@@ -344,7 +344,7 @@ fi
 if hasArg bench-ann || (( ${NUMARGS} == 0 )); then
     BUILD_ANN_BENCH=ON
     CMAKE_TARGET="${CMAKE_TARGET};${ANN_BENCH_TARGETS}"
-    if hasArg cpu-only; then
+    if hasArg --cpu-only; then
         COMPILE_LIBRARY=OFF
         CPU_ONLY=ON
     else
@@ -497,7 +497,7 @@ if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
 fi
 
 # Build and (optionally) install the raft-ann-bench Python package
-if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
+if (( ${NUMARGS} == 0 )) || hasArg bench-ann; then
     python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-ann-bench
 fi
 
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 18fd1ab548..1484705996 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -139,8 +139,10 @@ if(CUDA_STATIC_RUNTIME)
   set(_ctk_static_suffix "_static")
 endif()
 
-# CUDA runtime
-rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
+if(NOT CPU_ONLY)
+  # CUDA runtime
+  rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
+endif()
 
 if(NOT DISABLE_OPENMP)
   find_package(OpenMP)
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 2df845636e..c76133c6f2 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -40,14 +40,14 @@ if(CPU_ONLY)
   set(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OFF)
   set(RAFT_ANN_BENCH_USE_RAFT_CAGRA OFF)
   set(RAFT_ANN_BENCH_USE_GGNN OFF)
-endif()
-
-# Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled.
-# https://github.com/rapidsai/raft/issues/1627
-if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0)
-  set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF)
-  set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF)
-  set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF)
+else()
+  # Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled.
+  # https://github.com/rapidsai/raft/issues/1627
+  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0)
+    set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF)
+    set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF)
+    set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF)
+  endif()
 endif()
 
 set(RAFT_ANN_BENCH_USE_FAISS OFF)
diff --git a/python/raft-ann-bench/pyproject.toml b/python/raft-ann-bench/pyproject.toml
index 4c8cc94288..7decc8858b 100644
--- a/python/raft-ann-bench/pyproject.toml
+++ b/python/raft-ann-bench/pyproject.toml
@@ -32,8 +32,11 @@ dynamic = ["entry-points"]
 [project.urls]
 Homepage = "https://github.com/rapidsai/raft"
 
-[tool.setuptools]
-license-files = ["LICENSE"]
+[tool.setuptools.packages.find]
+where = ["src"]
+
+[tool.setuptools.package-data]
+"*" = ["*.*"]
 
 [tool.isort]
 line_length = 79
diff --git a/python/raft-ann-bench/raft-ann-bench/__init__.py b/python/raft-ann-bench/src/raft-ann-bench/__init__.py
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/__init__.py
rename to python/raft-ann-bench/src/raft-ann-bench/__init__.py
diff --git a/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/__main__.py
similarity index 95%
rename from python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
rename to python/raft-ann-bench/src/raft-ann-bench/get_dataset/__main__.py
index 605146a84e..1906bd2247 100644
--- a/python/raft-ann-bench/raft-ann-bench/get_dataset/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/__main__.py
@@ -32,7 +32,8 @@ def download_dataset(url, path):
 
 
 def convert_hdf5_to_fbin(path, normalize):
-    ann_bench_scripts_path = "hdf5_to_fbin.py"
+    scripts_path = os.path.dirname(os.path.realpath(__file__))
+    ann_bench_scripts_path = os.path.join(scripts_path, "hdf5_to_fbin.py")
     if normalize and "angular" in path:
         p = subprocess.Popen(
             ["python", ann_bench_scripts_path, "-n", "%s" % path]
diff --git a/python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/fbin_to_f16bin.py
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/get_dataset/fbin_to_f16bin.py
rename to python/raft-ann-bench/src/raft-ann-bench/get_dataset/fbin_to_f16bin.py
diff --git a/python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/hdf5_to_fbin.py
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/get_dataset/hdf5_to_fbin.py
rename to python/raft-ann-bench/src/raft-ann-bench/get_dataset/hdf5_to_fbin.py
diff --git a/python/raft-ann-bench/raft-ann-bench/plot/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/plot/__main__.py
rename to python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py
diff --git a/python/raft-ann-bench/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/__main__.py
rename to python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
diff --git a/python/raft-ann-bench/raft-ann-bench/run/algos-cpu.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/algos-cpu.yaml
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/algos-cpu.yaml
rename to python/raft-ann-bench/src/raft-ann-bench/run/algos-cpu.yaml
diff --git a/python/raft-ann-bench/raft-ann-bench/run/algos.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/algos.yaml
rename to python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/bigann-100M.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/bigann-100M.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/bigann-100M.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/deep-100M.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-1B.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/deep-1B.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-1B.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/deep-image-96-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-angular.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/deep-image-96-angular.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-angular.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/gist-960-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/gist-960-euclidean.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-angular.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/glove-100-inner.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/glove-50-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/glove-50-angular.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/lastfm-65-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/lastfm-65-angular.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/mnist-784-euclidean.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/nytimes-256-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/nytimes-256-angular.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json
diff --git a/python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/sift-128-euclidean.json
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/run/conf/sift-128-euclidean.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/sift-128-euclidean.json
diff --git a/python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/split_groundtruth/__main__.py
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/split_groundtruth/__main__.py
rename to python/raft-ann-bench/src/raft-ann-bench/split_groundtruth/__main__.py
diff --git a/python/raft-ann-bench/raft-ann-bench/split_groundtruth/split_groundtruth.pl b/python/raft-ann-bench/src/raft-ann-bench/split_groundtruth/split_groundtruth.pl
similarity index 100%
rename from python/raft-ann-bench/raft-ann-bench/split_groundtruth/split_groundtruth.pl
rename to python/raft-ann-bench/src/raft-ann-bench/split_groundtruth/split_groundtruth.pl

From 0a356085ac07542bb2dd20f996ab3bf93c141cc8 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Wed, 30 Aug 2023 10:50:03 -0500
Subject: [PATCH 50/70] FIX multiple fixes

---
 .../src/raft-ann-bench/get_dataset/__main__.py              | 1 +
 python/raft-ann-bench/src/raft-ann-bench/run/__main__.py    | 6 +++---
 .../run/conf/fashion-mnist-784-euclidean.json               | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/raft-ann-bench/src/raft-ann-bench/get_dataset/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/__main__.py
index 1906bd2247..d2cb8ebe98 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/get_dataset/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/get_dataset/__main__.py
@@ -34,6 +34,7 @@ def download_dataset(url, path):
 def convert_hdf5_to_fbin(path, normalize):
     scripts_path = os.path.dirname(os.path.realpath(__file__))
     ann_bench_scripts_path = os.path.join(scripts_path, "hdf5_to_fbin.py")
+    print(f"calling script {ann_bench_scripts_path}")
     if normalize and "angular" in path:
         p = subprocess.Popen(
             ["python", ann_bench_scripts_path, "-n", "%s" % path]
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
index 100bd26004..b077c87cc3 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
@@ -47,10 +47,10 @@ def find_executable(algos_conf, algo, k, batch_size):
     build_path = os.path.join(
         os.getenv("RAFT_HOME"), "cpp", "build", executable
     )
-    if os.path.exists(conda_path):
+    if conda_path is not None and os.path.exists(conda_path):
         print("-- Using RAFT bench found in conda environment: ")
         return (executable, conda_path, f"{algo}-{k}-{batch_size}")
-    elif os.path.exists(build_path):
+    elif build_path is not None and os.path.exists(build_path):
         print(f"-- Using RAFT bench from repository specified in {build_path}: ")
         return (executable, build_path, f"{algo}-{k}-{batch_size}")
     else:
@@ -120,7 +120,7 @@ def main():
 
         algo_file = "algos.yaml"
     except ImportError:
-        algo_file = "algos_cpu.yaml"
+        algo_file = "algos-cpu.yaml"
     with open(f"{scripts_path}/{algo_file}", "r") as f:
         algos_conf = yaml.safe_load(f)
 
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json
index 1a24eed018..e15102e9d8 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json
@@ -1,8 +1,8 @@
 {
   "dataset": {
     "name": "fashion-mnist-784-euclidean",
-    "base_file": "data/fashion-mnist-784-euclidean/base.fbin",
-    "query_file": "data/fashion-mnist-784-euclidean/query.fbin",
+    "base_file": "fashion-mnist-784-euclidean/base.fbin",
+    "query_file": "fashion-mnist-784-euclidean/query.fbin",
     "distance": "euclidean"
   },
   "search_basic_param": {

From 78356aaf8398bf9abca4e675f3400aacb2820841 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Wed, 30 Aug 2023 19:48:03 -0400
Subject: [PATCH 51/70] Merging python (will need some more fixes later but
 this will work for now)

---
 bench/ann/plot.py                             | 385 ------------------
 bench/ann/run.py                              | 243 -----------
 .../raft-ann-bench/data_export/__main__.py    |  54 ++-
 .../raft-ann-bench/plot/__main__.py           | 238 +++++++----
 .../raft-ann-bench/run/__main__.py            |   9 +-
 5 files changed, 189 insertions(+), 740 deletions(-)
 delete mode 100644 bench/ann/plot.py
 delete mode 100644 bench/ann/run.py
 rename bench/ann/data_export.py => python/raft-ann-bench/raft-ann-bench/data_export/__main__.py (57%)

diff --git a/bench/ann/plot.py b/bench/ann/plot.py
deleted file mode 100644
index ff7cb29b4a..0000000000
--- a/bench/ann/plot.py
+++ /dev/null
@@ -1,385 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script is inspired by 
-# 1: https://github.com/erikbern/ann-benchmarks/blob/main/plot.py
-# 2: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/utils.py
-# 3: https://github.com/erikbern/ann-benchmarks/blob/main/ann_benchmarks/plotting/metrics.py
-# Licence: https://github.com/erikbern/ann-benchmarks/blob/main/LICENSE
-
-import matplotlib as mpl
-
-mpl.use("Agg")  # noqa
-import argparse
-from collections import OrderedDict
-import itertools
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import os
-
-
-
-metrics = {
-    "k-nn": {
-        "description": "Recall",
-        "worst": float("-inf"),
-        "lim": [0.0, 1.03],
-    },
-    "qps": {
-        "description": "Queries per second (1/s)",
-        "worst": float("-inf"),
-    }
-}
-
-def positive_int(input_str: str) -> int:
-    try:
-        i = int(input_str)
-        if i < 1:
-            raise ValueError
-    except ValueError:
-        raise argparse.ArgumentTypeError(f"{input_str} is not a positive integer")
-
-    return i
-
-
-def generate_n_colors(n):
-    vs = np.linspace(0.3, 0.9, 7)
-    colors = [(0.9, 0.4, 0.4, 1.0)]
-
-    def euclidean(a, b):
-        return sum((x - y) ** 2 for x, y in zip(a, b))
-
-    while len(colors) < n:
-        new_color = max(itertools.product(vs, vs, vs), key=lambda a: min(euclidean(a, b) for b in colors))
-        colors.append(new_color + (1.0,))
-    return colors
-
-
-def create_linestyles(unique_algorithms):
-    colors = dict(zip(unique_algorithms, generate_n_colors(len(unique_algorithms))))
-    linestyles = dict((algo, ["--", "-.", "-", ":"][i % 4]) for i, algo in enumerate(unique_algorithms))
-    markerstyles = dict((algo, ["+", "<", "o", "*", "x"][i % 5]) for i, algo in enumerate(unique_algorithms))
-    faded = dict((algo, (r, g, b, 0.3)) for algo, (r, g, b, a) in colors.items())
-    return dict((algo, (colors[algo], faded[algo], linestyles[algo], markerstyles[algo])) for algo in unique_algorithms)
-
-
-def get_up_down(metric):
-    if metric["worst"] == float("inf"):
-        return "down"
-    return "up"
-
-
-def get_left_right(metric):
-    if metric["worst"] == float("inf"):
-        return "left"
-    return "right"
-
-
-def create_pointset(data, xn, yn):
-    xm, ym = (metrics[xn], metrics[yn])
-    rev_y = -1 if ym["worst"] < 0 else 1
-    rev_x = -1 if xm["worst"] < 0 else 1
-    data.sort(key=lambda t: (rev_y * t[-1], rev_x * t[-2]))
-
-    axs, ays, als, aidxs = [], [], [], []
-    # Generate Pareto frontier
-    xs, ys, ls, idxs = [], [], [], []
-    last_x = xm["worst"]
-    comparator = (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
-    for algo_name, index_name, xv, yv in data:
-        if not xv or not yv:
-            continue
-        axs.append(xv)
-        ays.append(yv)
-        als.append(algo_name)
-        aidxs.append(algo_name)
-        if comparator(xv, last_x):
-            last_x = xv
-            xs.append(xv)
-            ys.append(yv)
-            ls.append(algo_name)
-            idxs.append(index_name)
-    return xs, ys, ls, idxs, axs, ays, als, aidxs
-
-
-def create_plot_search(all_data, raw, x_scale, y_scale, fn_out, linestyles,
-                dataset, k, batch_size):
-    xn = "k-nn"
-    yn = "qps"
-    xm, ym = (metrics[xn], metrics[yn])
-    # Now generate each plot
-    handles = []
-    labels = []
-    plt.figure(figsize=(12, 9))
-
-    # Sorting by mean y-value helps aligning plots with labels
-    def mean_y(algo):
-        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(all_data[algo], xn, yn)
-        return -np.log(np.array(ys)).mean()
-
-    # Find range for logit x-scale
-    min_x, max_x = 1, 0
-    for algo in sorted(all_data.keys(), key=mean_y):
-        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(all_data[algo], xn, yn)
-        min_x = min([min_x] + [x for x in xs if x > 0])
-        max_x = max([max_x] + [x for x in xs if x < 1])
-        color, faded, linestyle, marker = linestyles[algo]
-        (handle,) = plt.plot(
-            xs, ys, "-", label=algo, color=color, ms=7, mew=3, lw=3, marker=marker
-        )
-        handles.append(handle)
-        if raw:
-            (handle2,) = plt.plot(
-                axs, ays, "-", label=algo, color=faded, ms=5, mew=2, lw=2, marker=marker
-            )
-        labels.append(algo)
-
-    ax = plt.gca()
-    ax.set_ylabel(ym["description"])
-    ax.set_xlabel(xm["description"])
-    # Custom scales of the type --x-scale a3
-    if x_scale[0] == "a":
-        alpha = float(x_scale[1:])
-
-        def fun(x):
-            return 1 - (1 - x) ** (1 / alpha)
-
-        def inv_fun(x):
-            return 1 - (1 - x) ** alpha
-
-        ax.set_xscale("function", functions=(fun, inv_fun))
-        if alpha <= 3:
-            ticks = [inv_fun(x) for x in np.arange(0, 1.2, 0.2)]
-            plt.xticks(ticks)
-        if alpha > 3:
-            from matplotlib import ticker
-
-            ax.xaxis.set_major_formatter(ticker.LogitFormatter())
-            # plt.xticks(ticker.LogitLocator().tick_values(min_x, max_x))
-            plt.xticks([0, 1 / 2, 1 - 1e-1, 1 - 1e-2, 1 - 1e-3, 1 - 1e-4, 1])
-    # Other x-scales
-    else:
-        ax.set_xscale(x_scale)
-    ax.set_yscale(y_scale)
-    ax.set_title(f"{dataset} k={k} batch_size={batch_size}")
-    plt.gca().get_position()
-    # plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])
-    ax.legend(handles, labels, loc="center left", bbox_to_anchor=(1, 0.5), prop={"size": 9})
-    plt.grid(visible=True, which="major", color="0.65", linestyle="-")
-    plt.setp(ax.get_xminorticklabels(), visible=True)
-
-    # Logit scale has to be a subset of (0,1)
-    if "lim" in xm and x_scale != "logit":
-        x0, x1 = xm["lim"]
-        plt.xlim(max(x0, 0), min(x1, 1))
-    elif x_scale == "logit":
-        plt.xlim(min_x, max_x)
-    if "lim" in ym:
-        plt.ylim(ym["lim"])
-
-    # Workaround for bug https://github.com/matplotlib/matplotlib/issues/6789
-    ax.spines["bottom"]._adjust_location()
-
-    print(f"writing search output to {fn_out}")
-    plt.savefig(fn_out, bbox_inches="tight")
-    plt.close()
-
-
-def create_plot_build(build_results, search_results, linestyles, fn_out,
-                      dataset, k, batch_size):
-    xn = "k-nn"
-    yn = "qps"
-
-    qps_85 = [-1] * len(linestyles)
-    bt_85 = [0] * len(linestyles)
-    i_85 = [-1] * len(linestyles)
-
-    qps_90 = [-1] * len(linestyles)
-    bt_90 = [0] * len(linestyles)
-    i_90 = [-1] * len(linestyles)
-
-    qps_95 = [-1] * len(linestyles)
-    bt_95 = [0] * len(linestyles)
-    i_95 = [-1] * len(linestyles)
-
-    data = OrderedDict()
-    colors = OrderedDict()
-
-    # Sorting by mean y-value helps aligning plots with labels
-    def mean_y(algo):
-        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(search_results[algo], xn, yn)
-        return -np.log(np.array(ys)).mean()
-
-    for pos, algo in enumerate(sorted(search_results.keys(), key=mean_y)):
-        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(search_results[algo], xn, yn)
-        # x is recall, y is qps, ls is algo_name, idxs is index_name
-        for i in range(len(xs)):
-            if xs[i] >= 0.85 and xs[i] < 0.9 and ys[i] > qps_85[pos]:
-                qps_85[pos] = ys[i]
-                bt_85[pos] = build_results[(ls[i], idxs[i])][0][2]
-                i_85[pos] = idxs[i]
-            elif xs[i] >= 0.9 and xs[i] < 0.95 and ys[i] > qps_90[pos]:
-                qps_90[pos] = ys[i]
-                bt_90[pos] = build_results[(ls[i], idxs[i])][0][2]
-                i_90[pos] = idxs[i]
-            elif xs[i] >= 0.95 and ys[i] > qps_95[pos]:
-                qps_95[pos] = ys[i]
-                bt_95[pos] = build_results[(ls[i], idxs[i])][0][2]
-                i_95[pos] = idxs[i]
-        data[algo] = [bt_85[pos], bt_90[pos], bt_95[pos]]
-        colors[algo] = linestyles[algo][0]
-    
-    index = ['@85% Recall', '@90% Recall', '@95% Recall']
-    
-    df = pd.DataFrame(data, index=index)
-    plt.figure(figsize=(12, 9))
-    ax = df.plot.bar(rot=0, color=colors)
-    fig = ax.get_figure()
-    print(f"writing build output to {fn_out}")
-    plt.title("Build Time for Highest QPS")
-    plt.suptitle(f"{dataset} k={k} batch_size={batch_size}")
-    plt.ylabel("Build Time (s)")
-    fig.savefig(fn_out)
-
-
-def load_lines(results_path, result_files, method, index_key):
-    results = dict()
-
-    for result_filename in result_files:
-        if result_filename.endswith('.csv'):
-            with open(os.path.join(results_path, result_filename), 'r') as f:
-                lines = f.readlines()
-                lines = lines[:-1] if lines[-1] == "\n" else lines
-                
-                if method == "build":
-                    key_idx = [2]
-                elif method == "search":
-                    key_idx = [2, 3]
-
-                for line in lines[1:]:
-                    split_lines = line.split(',')
-
-                    algo_name = split_lines[0]
-                    index_name = split_lines[1]
-
-                    if index_key == "algo":
-                        dict_key = algo_name
-                    elif index_key == "index":
-                        dict_key = (algo_name, index_name)
-                    if dict_key not in results:
-                        results[dict_key] = []
-                    to_add = [algo_name, index_name]
-                    for key_i in key_idx:
-                        to_add.append(float(split_lines[key_i]))
-                    results[dict_key].append(to_add)
-
-    return results
-
-
-def load_all_results(dataset_path, algorithms, k, batch_size, method, index_key):
-    results_path = os.path.join(dataset_path, "result", method)
-    result_files = os.listdir(results_path)
-    result_files = [result_filename for result_filename in result_files \
-                    if f"{k}-{batch_size}" in result_filename]
-    if len(algorithms) > 0:
-        result_files = [result_filename for result_filename in result_files if \
-                        result_filename.split('-')[0] in algorithms]
-
-    results = load_lines(results_path, result_files, method, index_key)
-
-    return results
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--dataset", help="dataset to download",
-                        default="glove-100-inner")
-    parser.add_argument("--dataset-path", help="path to dataset folder",
-                        default=os.path.join(os.getenv("RAFT_HOME"), 
-                                             "bench", "ann", "data"))
-    parser.add_argument("--output-filepath",
-                        help="directory for PNG to be saved",
-                        default=os.getcwd())
-    parser.add_argument("--algorithms",
-                        help="plot only comma separated list of named \
-                              algorithms",
-                        default=None)
-    parser.add_argument(
-        "-k", "--count", default=10, type=positive_int, help="the number of nearest neighbors to search for"
-    )
-    parser.add_argument(
-        "-bs", "--batch-size", default=10000, type=positive_int, help="number of query vectors to use in each query trial"
-    )
-    parser.add_argument(
-        "--build",
-        action="store_true"
-    )
-    parser.add_argument(
-        "--search",
-        action="store_true"
-    )
-    parser.add_argument(
-        "--x-scale",
-        help="Scale to use when drawing the X-axis. \
-              Typically linear, logit or a2", 
-        default="linear"
-    )
-    parser.add_argument(
-        "--y-scale",
-        help="Scale to use when drawing the Y-axis",
-        choices=["linear", "log", "symlog", "logit"],
-        default="linear",
-    )
-    parser.add_argument(
-        "--raw", help="Show raw results (not just Pareto frontier) in faded colours", action="store_true"
-    )
-
-    args = parser.parse_args()
-
-    if args.algorithms:
-        algorithms = args.algorithms.split(',')
-    else:
-        algorithms = []
-    k = args.count
-    batch_size = args.batch_size
-    if not args.build and not args.search:
-        build = True
-        search = True
-    else:
-        build = args.build
-        search = args.search
-
-    search_output_filepath = os.path.join(args.output_filepath, f"search-{args.dataset}-k{k}-batch_size{batch_size}.png")
-    build_output_filepath = os.path.join(args.output_filepath, f"build-{args.dataset}-k{k}-batch_size{batch_size}.png")
-
-    search_results = load_all_results(
-                        os.path.join(args.dataset_path, args.dataset),
-                        algorithms, k, batch_size, "search", "algo")
-    linestyles = create_linestyles(sorted(search_results.keys()))
-    if search:
-        create_plot_search(search_results, args.raw, args.x_scale, args.y_scale, 
-                           search_output_filepath, linestyles, args.dataset, k, batch_size)
-    if build:
-        build_results = load_all_results(
-            os.path.join(args.dataset_path, args.dataset),
-            algorithms, k, batch_size, "build", "index")
-        create_plot_build(build_results, search_results, linestyles, build_output_filepath,
-                          args.dataset, k, batch_size)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bench/ann/run.py b/bench/ann/run.py
deleted file mode 100644
index 2da966cbcd..0000000000
--- a/bench/ann/run.py
+++ /dev/null
@@ -1,243 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import json
-import os
-import subprocess
-import yaml
-
-
-def positive_int(input_str: str) -> int:
-    try:
-        i = int(input_str)
-        if i < 1:
-            raise ValueError
-    except ValueError:
-        raise argparse.ArgumentTypeError(f"{input_str} is not a positive integer")
-
-    return i
-
-def validate_algorithm(algos_conf, algo):
-    algos_conf_keys = set(algos_conf.keys())
-    return algo in algos_conf_keys and not algos_conf[algo]["disabled"]
-
-
-def find_executable(algos_conf, algo, k, batch_size):
-    executable = algos_conf[algo]["executable"]
-    conda_path = os.path.join(os.getenv("CONDA_PREFIX"), "bin", "ann",
-                              executable)
-    build_path = os.path.join(os.getenv("RAFT_HOME"), "cpp", "build", executable)
-    if os.path.exists(conda_path):
-        return (executable, conda_path, f"{algo}-k{k}-batch_size{batch_size}")
-    elif os.path.exists(build_path):
-        return (executable, build_path, f"{algo}-k{k}-batch_size{batch_size}")
-    else:
-        raise FileNotFoundError(executable)
-
-
-def run_build_and_search(conf_file, conf_filename, conf_filedir,
-                         executables_to_run, dataset_path, force,
-                         build, search, k, batch_size):
-    for executable, ann_executable_path, algo in executables_to_run.keys():
-        # Need to write temporary configuration
-        temp_conf_filename = f"temporary_{conf_filename}"
-        temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename)
-        with open(temp_conf_filepath, "w") as f:
-            temp_conf = dict()
-            temp_conf["dataset"] = conf_file["dataset"]
-            temp_conf["search_basic_param"] = conf_file["search_basic_param"]
-            temp_conf["index"] = executables_to_run[(executable, 
-                                                     ann_executable_path,
-                                                     algo)]["index"]
-            json.dump(temp_conf, f)
-
-        legacy_result_folder = os.path.join(dataset_path, conf_file['dataset']['name'], 'result')
-        os.makedirs(legacy_result_folder, exist_ok=True)
-        if build:
-            build_folder = os.path.join(legacy_result_folder, "build")
-            os.makedirs(build_folder, exist_ok=True)
-            cmd = [ann_executable_path,
-                   "--build",
-                   "--data_prefix="+dataset_path,
-                   "--benchmark_out_format=json",
-                   f"--benchmark_out={os.path.join(build_folder, f'{algo}.json')}"]
-            if force:
-                cmd = cmd + ["--overwrite"]
-            cmd = cmd + [temp_conf_filepath]
-            print(cmd)
-            p = subprocess.Popen(cmd)
-            p.wait()
-
-        if search:
-            search_folder = os.path.join(legacy_result_folder, "search")
-            os.makedirs(search_folder, exist_ok=True)
-            cmd = [ann_executable_path,
-                   "--search",
-                   "--data_prefix="+dataset_path,
-                   "--benchmark_counters_tabular",
-                   "--override_kv=k:%s" % k,
-                   "--override_kv=n_queries:%s" % batch_size,
-                   "--benchmark_out_format=json",
-                   f"--benchmark_out={os.path.join(search_folder, f'{algo}.json')}"]
-            if force:
-                cmd = cmd + ["--overwrite"]
-            cmd = cmd + [temp_conf_filepath]
-            print(cmd)
-            p = subprocess.Popen(cmd)
-            p.wait()
-
-        os.remove(temp_conf_filepath)
-
-
-def main():
-    scripts_path = os.path.dirname(os.path.realpath(__file__))
-    # Read list of allowed algorithms
-    with open(f"{scripts_path}/algos.yaml", "r") as f:
-        algos_conf = yaml.safe_load(f)
-
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    parser.add_argument(
-        "-k", "--count", default=10, type=positive_int, help="the number of nearest neighbors to search for"
-    )
-    parser.add_argument(
-        "-bs", "--batch-size", default=10000, type=positive_int, help="number of query vectors to use in each query trial"
-    )
-    parser.add_argument(
-        "--configuration",
-        help="path to configuration file for a dataset",
-    )
-    parser.add_argument(
-        "--dataset",
-        help="dataset whose configuration file will be used",
-        default="glove-100-inner"
-    )
-    parser.add_argument(
-        "--dataset-path",
-        help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"),
-                             "bench", "ann", "data")
-    )
-    parser.add_argument(
-        "--build",
-        action="store_true"
-    )
-    parser.add_argument(
-        "--search",
-        action="store_true"
-    )
-    parser.add_argument("--algorithms",
-                        help="run only comma separated list of named \
-                              algorithms",
-                        default=None)
-    parser.add_argument("--indices",
-                        help="run only comma separated list of named indices. \
-                              parameter `algorithms` is ignored",
-                        default=None)
-    parser.add_argument("-f", "--force",
-                        help="re-run algorithms even if their results \
-                              already exist",
-                        action="store_true")
-
-    args = parser.parse_args()
-
-    # If both build and search are not provided,
-    # run both
-    if not args.build and not args.search:
-        build = True
-        search = True
-    else:
-        build = args.build
-        search = args.search
-
-    k = args.count
-    batch_size = args.batch_size
-
-    # Read configuration file associated to dataset
-    if args.configuration:
-        conf_filepath = args.configuration
-    elif args.dataset:
-        conf_filepath = \
-            os.path.join(scripts_path, "conf", f"{args.dataset}.json")
-    else:
-        raise ValueError("One of parameters `configuration` or \
-                         `dataset` need to be provided")
-    conf_filename = conf_filepath.split("/")[-1]
-    conf_filedir = "/".join(conf_filepath.split("/")[:-1])
-    dataset_name = conf_filename.replace(".json", "")
-    dataset_path = args.dataset_path
-    if not os.path.exists(conf_filepath):
-        raise FileNotFoundError(conf_filename)
-    if not os.path.exists(os.path.join(args.dataset_path, dataset_name)):
-        raise FileNotFoundError(os.path.join(args.dataset_path, dataset_name))
-
-    with open(conf_filepath, "r") as f:
-        conf_file = json.load(f)
-
-    executables_to_run = dict()
-    # At least one named index should exist in config file
-    if args.indices:
-        indices = set(args.indices.split(","))
-        # algo associated with index should still be present in algos.yaml
-        # and enabled
-        for index in conf_file["index"]:
-            curr_algo = index["algo"]
-            if index["name"] in indices and \
-                    validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo, k, batch_size)
-                if executable_path not in executables_to_run:
-                    executables_to_run[executable_path] = {"index": []}
-                executables_to_run[executable_path]["index"].append(index)
-
-    # switch to named algorithms if indices parameter is not supplied
-    elif args.algorithms:
-        algorithms = set(args.algorithms.split(","))
-        # pick out algorithms from conf file that exist
-        # and are enabled in algos.yaml
-        for index in conf_file["index"]:
-            curr_algo = index["algo"]
-            if curr_algo in algorithms and \
-                    validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo, k, batch_size)
-                if executable_path not in executables_to_run:
-                    executables_to_run[executable_path] = {"index": []}
-                executables_to_run[executable_path]["index"].append(index)
-
-    # default, try to run all available algorithms
-    else:
-        for index in conf_file["index"]:
-            curr_algo = index["algo"]
-            if validate_algorithm(algos_conf, curr_algo):
-                executable_path = find_executable(algos_conf, curr_algo, k, batch_size)
-                if executable_path not in executables_to_run:
-                    executables_to_run[executable_path] = {"index": []}
-                executables_to_run[executable_path]["index"].append(index)
-
-    # Replace index to dataset path
-    for executable_path in executables_to_run:
-        for pos, index in enumerate(executables_to_run[executable_path]["index"]):
-            index["file"] = os.path.join(dataset_path, dataset_name, "index", index["name"])
-            executables_to_run[executable_path]["index"][pos] = index
-
-    run_build_and_search(conf_file, conf_filename, conf_filedir, 
-                         executables_to_run, dataset_path,
-                         args.force, build, search,
-                         k, batch_size)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/bench/ann/data_export.py b/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
similarity index 57%
rename from bench/ann/data_export.py
rename to python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
index 33304bc276..afc843140d 100644
--- a/bench/ann/data_export.py
+++ b/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
@@ -15,9 +15,10 @@
 
 
 import argparse
-import pandas as pd
-import os
 import json
+import os
+
+import pandas as pd
 
 
 def read_file(dataset, dataset_path, method):
@@ -27,35 +28,48 @@ def read_file(dataset, dataset_path, method):
             with open(os.path.join(dir, file), "r") as f:
                 data = json.load(f)
                 df = pd.DataFrame(data["benchmarks"])
-                yield (os.path.join(dir, file), file.split('-')[0], df)
+                yield (os.path.join(dir, file), file.split("-")[0], df)
+
 
 def convert_json_to_csv_build(dataset, dataset_path):
     for file, algo_name, df in read_file(dataset, dataset_path, "build"):
-        df['name'] = df['name'].str.split('/').str[0]
-        write = pd.DataFrame({'algo_name' : [algo_name] * len(df),
-                              'index_name' : df['name'],
-                              'time' : df['real_time']})
-        write.to_csv(file.replace('.json', '.csv'), index=False)
+        df["name"] = df["name"].str.split("/").str[0]
+        write = pd.DataFrame(
+            {
+                "algo_name": [algo_name] * len(df),
+                "index_name": df["name"],
+                "time": df["real_time"],
+            }
+        )
+        write.to_csv(file.replace(".json", ".csv"), index=False)
 
 
 def convert_json_to_csv_search(dataset, dataset_path):
     for file, algo_name, df in read_file(dataset, dataset_path, "search"):
-        df['name'] = df['name'].str.split('/').str[0]
-        write = pd.DataFrame({'algo_name' : [algo_name] * len(df),
-                              'index_name' : df['name'],
-                              'recall' : df['Recall'],
-                              'qps' : df['items_per_second']})
-        write.to_csv(file.replace('.json', '.csv'), index=False)
+        df["name"] = df["name"].str.split("/").str[0]
+        write = pd.DataFrame(
+            {
+                "algo_name": [algo_name] * len(df),
+                "index_name": df["name"],
+                "recall": df["Recall"],
+                "qps": df["items_per_second"],
+            }
+        )
+        write.to_csv(file.replace(".json", ".csv"), index=False)
 
 
 def main():
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--dataset", help="dataset to download",
-                        default="glove-100-inner")
-    parser.add_argument("--dataset-path", help="path to dataset folder",
-                        default=os.path.join(os.getenv("RAFT_HOME"), 
-                                             "bench", "ann", "data"))
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset", help="dataset to download", default="glove-100-inner"
+    )
+    parser.add_argument(
+        "--dataset-path",
+        help="path to dataset folder",
+        default=os.path.join(os.getenv("RAFT_HOME"), "bench", "ann", "data"),
+    )
     args = parser.parse_args()
     convert_json_to_csv_build(args.dataset, args.dataset_path)
     convert_json_to_csv_search(args.dataset, args.dataset_path)
diff --git a/python/raft-ann-bench/raft-ann-bench/plot/__main__.py b/python/raft-ann-bench/raft-ann-bench/plot/__main__.py
index bbd88a0d3d..6eca2dfc44 100644
--- a/python/raft-ann-bench/raft-ann-bench/plot/__main__.py
+++ b/python/raft-ann-bench/raft-ann-bench/plot/__main__.py
@@ -20,16 +20,14 @@
 # Licence: https://github.com/erikbern/ann-benchmarks/blob/main/LICENSE
 
 import argparse
-from collections import OrderedDict
 import itertools
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
 import os
+from collections import OrderedDict
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 import numpy as np
+import pandas as pd
 
 mpl.use("Agg")
 
@@ -45,13 +43,16 @@
     },
 }
 
+
 def positive_int(input_str: str) -> int:
     try:
         i = int(input_str)
         if i < 1:
             raise ValueError
     except ValueError:
-        raise argparse.ArgumentTypeError(f"{input_str} is not a positive integer")
+        raise argparse.ArgumentTypeError(
+            f"{input_str} is not a positive integer"
+        )
 
     return i
 
@@ -118,7 +119,9 @@ def create_pointset(data, xn, yn):
     # Generate Pareto frontier
     xs, ys, ls, idxs = [], [], [], []
     last_x = xm["worst"]
-    comparator = (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
+    comparator = (
+        (lambda xv, lx: xv > lx) if last_x < 0 else (lambda xv, lx: xv < lx)
+    )
     for algo_name, index_name, xv, yv in data:
         if not xv or not yv:
             continue
@@ -135,8 +138,9 @@ def create_pointset(data, xn, yn):
     return xs, ys, ls, idxs, axs, ays, als, aidxs
 
 
-def create_plot_search(all_data, raw, x_scale, y_scale, fn_out, linestyles,
-                dataset, k, batch_size):
+def create_plot_search(
+    all_data, raw, x_scale, y_scale, fn_out, linestyles, dataset, k, batch_size
+):
     xn = "k-nn"
     yn = "qps"
     xm, ym = (metrics[xn], metrics[yn])
@@ -147,13 +151,17 @@ def create_plot_search(all_data, raw, x_scale, y_scale, fn_out, linestyles,
 
     # Sorting by mean y-value helps aligning plots with labels
     def mean_y(algo):
-        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(all_data[algo], xn, yn)
+        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(
+            all_data[algo], xn, yn
+        )
         return -np.log(np.array(ys)).mean()
 
     # Find range for logit x-scale
     min_x, max_x = 1, 0
     for algo in sorted(all_data.keys(), key=mean_y):
-        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(all_data[algo], xn, yn)
+        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(
+            all_data[algo], xn, yn
+        )
         min_x = min([min_x] + [x for x in xs if x > 0])
         max_x = max([max_x] + [x for x in xs if x < 1])
         color, faded, linestyle, marker = linestyles[algo]
@@ -240,20 +248,21 @@ def inv_fun(x):
     plt.close()
 
 
-def create_plot_build(build_results, search_results, linestyles, fn_out,
-                      dataset, k, batch_size):
+def create_plot_build(
+    build_results, search_results, linestyles, fn_out, dataset, k, batch_size
+):
     xn = "k-nn"
     yn = "qps"
 
-    recall_85 = [-1] * len(linestyles)
+    # recall_85 = [-1] * len(linestyles)
     qps_85 = [-1] * len(linestyles)
     bt_85 = [0] * len(linestyles)
     i_85 = [-1] * len(linestyles)
-    recall_90 = [-1] * len(linestyles)
+    # recall_90 = [-1] * len(linestyles)
     qps_90 = [-1] * len(linestyles)
     bt_90 = [0] * len(linestyles)
     i_90 = [-1] * len(linestyles)
-    recall_95 = [-1] * len(linestyles)
+    # recall_95 = [-1] * len(linestyles)
     qps_95 = [-1] * len(linestyles)
     bt_95 = [0] * len(linestyles)
     i_95 = [-1] * len(linestyles)
@@ -262,11 +271,15 @@ def create_plot_build(build_results, search_results, linestyles, fn_out,
 
     # Sorting by mean y-value helps aligning plots with labels
     def mean_y(algo):
-        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(search_results[algo], xn, yn)
+        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(
+            search_results[algo], xn, yn
+        )
         return -np.log(np.array(ys)).mean()
 
     for pos, algo in enumerate(sorted(search_results.keys(), key=mean_y)):
-        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(search_results[algo], xn, yn)
+        xs, ys, ls, idxs, axs, ays, als, aidxs = create_pointset(
+            search_results[algo], xn, yn
+        )
         # x is recall, y is qps, ls is algo_name, idxs is index_name
         for i in range(len(xs)):
             if xs[i] >= 0.85 and xs[i] < 0.9 and ys[i] > qps_85[pos]:
@@ -284,7 +297,7 @@ def mean_y(algo):
         data[algo] = [bt_85[pos], bt_90[pos], bt_95[pos]]
         colors[algo] = linestyles[algo][0]
 
-    index = ['@85% Recall', '@90% Recall', '@95% Recall']
+    index = ["@85% Recall", "@90% Recall", "@95% Recall"]
 
     df = pd.DataFrame(data, index=index)
     plt.figure(figsize=(12, 9))
@@ -303,54 +316,63 @@ def load_lines(results_path, result_files, method, index_key):
     linebreaker = "name,iterations"
 
     for result_filename in result_files:
-        with open(os.path.join(results_path, result_filename), 'r') as f:
-            lines = f.readlines()
-            lines = lines[:-1] if lines[-1] == "\n" else lines
-            idx = 0
-            for pos, line in enumerate(lines):
-                if linebreaker in line:
-                    idx = pos
-                    break
-
-            if method == "build":
-                if "hnswlib" in result_filename:
-                    key_idx = [2]
-                else:
-                    key_idx = [10]
-            elif method == "search":
-                if "hnswlib" in result_filename:
-                    key_idx = [10, 6]
-                else:
-                    key_idx = [12, 10]
-
-            for line in lines[idx+1:]:
-                split_lines = line.split(',')
-
-                algo_name = split_lines[0].split('.')[0].strip("\"")
-                index_name = split_lines[0].split('/')[0].strip("\"")
-
-                if index_key == "algo":
-                    dict_key = algo_name
-                elif index_key == "index":
-                    dict_key = (algo_name, index_name)
-                if dict_key not in results:
-                    results[dict_key] = []
-                to_add = [algo_name, index_name]
-                for key_i in key_idx:
-                    to_add.append(float(split_lines[key_i]))
-                results[dict_key].append(to_add)
+        if result_filename.endswith(".csv"):
+            with open(os.path.join(results_path, result_filename), "r") as f:
+                lines = f.readlines()
+                lines = lines[:-1] if lines[-1] == "\n" else lines
+                idx = 0
+                for pos, line in enumerate(lines):
+                    if linebreaker in line:
+                        idx = pos
+                        break
+
+                if method == "build":
+                    if "hnswlib" in result_filename:
+                        key_idx = [2]
+                    else:
+                        key_idx = [10]
+                elif method == "search":
+                    if "hnswlib" in result_filename:
+                        key_idx = [10, 6]
+                    else:
+                        key_idx = [12, 10]
+
+                for line in lines[idx + 1 :]:
+                    split_lines = line.split(",")
+
+                    algo_name = split_lines[0].split(".")[0].strip('"')
+                    index_name = split_lines[0].split("/")[0].strip('"')
+
+                    if index_key == "algo":
+                        dict_key = algo_name
+                    elif index_key == "index":
+                        dict_key = (algo_name, index_name)
+                    if dict_key not in results:
+                        results[dict_key] = []
+                    to_add = [algo_name, index_name]
+                    for key_i in key_idx:
+                        to_add.append(float(split_lines[key_i]))
+                    results[dict_key].append(to_add)
 
     return results
 
 
-def load_all_results(dataset_path, algorithms, k, batch_size, method, index_key):
+def load_all_results(
+    dataset_path, algorithms, k, batch_size, method, index_key
+):
     results_path = os.path.join(dataset_path, "result", method)
     result_files = os.listdir(results_path)
-    result_files = [result_filename for result_filename in result_files \
-                    if f"{k}-{batch_size}" in result_filename]
+    result_files = [
+        result_filename
+        for result_filename in result_files
+        if f"{k}-{batch_size}" in result_filename
+    ]
     if len(algorithms) > 0:
-        result_files = [result_filename for result_filename in result_files if \
-                        result_filename.split('-')[0] in algorithms]
+        result_files = [
+            result_filename
+            for result_filename in result_files
+            if result_filename.split("-")[0] in algorithms
+        ]
 
     results = load_lines(results_path, result_files, method, index_key)
 
@@ -359,33 +381,43 @@ def load_all_results(dataset_path, algorithms, k, batch_size, method, index_key)
 
 def main():
     parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--dataset", help="dataset to download",
-                        default="glove-100-inner")
-    parser.add_argument("--dataset-path", help="path to dataset folder",
-                        default=os.path.join(os.getenv("RAFT_HOME"),
-                                             "bench", "ann", "data"))
-    parser.add_argument("--output-filepath",
-                        help="directory for PNG to be saved",
-                        default=os.getcwd())
-    parser.add_argument("--algorithms",
-                        help="plot only comma separated list of named \
-                              algorithms",
-                        default=None)
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset", help="dataset to download", default="glove-100-inner"
+    )
     parser.add_argument(
-        "-k", "--count", default=10, type=positive_int, help="the number of nearest neighbors to search for"
+        "--dataset-path",
+        help="path to dataset folder",
+        default=os.path.join(os.getenv("RAFT_HOME"), "bench", "ann", "data"),
     )
     parser.add_argument(
-        "-bs", "--batch-size", default=10000, type=positive_int, help="number of query vectors to use in each query trial"
+        "--output-filepath",
+        help="directory for PNG to be saved",
+        default=os.getcwd(),
+    )
+    parser.add_argument(
+        "--algorithms",
+        help="plot only comma separated list of named \
+                              algorithms",
+        default=None,
     )
     parser.add_argument(
-        "--build",
-        action="store_true"
+        "-k",
+        "--count",
+        default=10,
+        type=positive_int,
+        help="the number of nearest neighbors to search for",
     )
     parser.add_argument(
-        "--search",
-        action="store_true"
+        "-bs",
+        "--batch-size",
+        default=10000,
+        type=positive_int,
+        help="number of query vectors to use in each query trial",
     )
+    parser.add_argument("--build", action="store_true")
+    parser.add_argument("--search", action="store_true")
     parser.add_argument(
         "--x-scale",
         help="Scale to use when drawing the X-axis. \
@@ -407,7 +439,7 @@ def main():
     args = parser.parse_args()
 
     if args.algorithms:
-        algorithms = args.algorithms.split(',')
+        algorithms = args.algorithms.split(",")
     else:
         algorithms = []
     k = args.count
@@ -419,22 +451,52 @@ def main():
         build = args.build
         search = args.search
 
-    search_output_filepath = os.path.join(args.output_filepath, f"search-{args.dataset}-{k}-{batch_size}.png")
-    build_output_filepath = os.path.join(args.output_filepath, f"build-{args.dataset}-{k}-{batch_size}.png")
+    search_output_filepath = os.path.join(
+        args.output_filepath, f"search-{args.dataset}-{k}-{batch_size}.png"
+    )
+    build_output_filepath = os.path.join(
+        args.output_filepath, f"build-{args.dataset}-{k}-{batch_size}.png"
+    )
 
     search_results = load_all_results(
-                        os.path.join(args.dataset_path, args.dataset),
-                        algorithms, k, batch_size, "search", "algo")
+        os.path.join(args.dataset_path, args.dataset),
+        algorithms,
+        k,
+        batch_size,
+        "search",
+        "algo",
+    )
     linestyles = create_linestyles(sorted(search_results.keys()))
     if search:
-        create_plot_search(search_results, args.raw, args.x_scale, args.y_scale,
-                           search_output_filepath, linestyles, args.dataset, k, batch_size)
+        create_plot_search(
+            search_results,
+            args.raw,
+            args.x_scale,
+            args.y_scale,
+            search_output_filepath,
+            linestyles,
+            args.dataset,
+            k,
+            batch_size,
+        )
     if build:
         build_results = load_all_results(
             os.path.join(args.dataset_path, args.dataset),
-            algorithms, k, batch_size, "build", "index")
-        create_plot_build(build_results, search_results, linestyles, build_output_filepath,
-                          args.dataset, k, batch_size)
+            algorithms,
+            k,
+            batch_size,
+            "build",
+            "index",
+        )
+        create_plot_build(
+            build_results,
+            search_results,
+            linestyles,
+            build_output_filepath,
+            args.dataset,
+            k,
+            batch_size,
+        )
 
 
 if __name__ == "__main__":
diff --git a/python/raft-ann-bench/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/raft-ann-bench/run/__main__.py
index 4836bd1393..ad7c0c358b 100644
--- a/python/raft-ann-bench/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/raft-ann-bench/run/__main__.py
@@ -95,8 +95,9 @@ def run_build_and_search(
                 ann_executable_path,
                 "--build",
                 "--data_prefix=" + dataset_path,
-                "--benchmark_out_format=csv",
-                f"--benchmark_out={os.path.join(build_folder, f'{algo}.csv')}",
+                "--benchmark_out_format=json",
+                "--benchmark_out="
+                + f"{os.path.join(build_folder, f'{algo}.json')}",
             ]
             if force:
                 cmd = cmd + ["--overwrite"]
@@ -116,9 +117,9 @@ def run_build_and_search(
                 "--override_kv=k:%s" % k,
                 "--override_kv=n_queries:%s" % batch_size,
                 "--benchmark_min_warmup_time=0.01",
-                "--benchmark_out_format=csv",
+                "--benchmark_out_format=json",
                 "--benchmark_out="
-                + f"{os.path.join(search_folder, f'{algo}.csv')}",
+                + f"{os.path.join(search_folder, f'{algo}.json')}",
             ]
             if force:
                 cmd = cmd + ["--overwrite"]

From 0dc3ce45a547320b6c558eaeb4e895a02d3e6b89 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Wed, 30 Aug 2023 22:53:39 -0500
Subject: [PATCH 52/70] FIX many improvements and small fixes

---
 build.sh                                      |  3 +-
 cpp/CMakeLists.txt                            | 22 +++++++-----
 cpp/bench/ann/CMakeLists.txt                  |  5 ++-
 cpp/bench/ann/src/common/ann_types.hpp        |  5 +--
 cpp/bench/ann/src/common/benchmark.hpp        |  2 ++
 cpp/bench/ann/src/common/util.hpp             |  6 ++++
 .../ann/src/hnswlib/hnswlib_benchmark.cpp     |  2 +-
 .../src/raft-ann-bench/run/__main__.py        | 34 ++++++++++++-------
 8 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/build.sh b/build.sh
index e54d37f4a1..aa4446bcde 100755
--- a/build.sh
+++ b/build.sh
@@ -347,6 +347,7 @@ if hasArg bench-ann || (( ${NUMARGS} == 0 )); then
     if hasArg --cpu-only; then
         COMPILE_LIBRARY=OFF
         CPU_ONLY=ON
+        NVTX=OFF
     else
         COMPILE_LIBRARY=ON
     fi
@@ -498,7 +499,7 @@ fi
 
 # Build and (optionally) install the raft-ann-bench Python package
 if (( ${NUMARGS} == 0 )) || hasArg bench-ann; then
-    python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-ann-bench
+    python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-ann-bench -vvv
 fi
 
 if hasArg docs; then
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 1484705996..28ced15afa 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -142,23 +142,27 @@ endif()
 if(NOT CPU_ONLY)
   # CUDA runtime
   rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
+  # * find CUDAToolkit package
+  # * determine GPU architectures
+  # * enable the CMake CUDA language
+  # * set other CUDA compilation flags
+  rapids_find_package(
+    CUDAToolkit REQUIRED
+    BUILD_EXPORT_SET raft-exports
+    INSTALL_EXPORT_SET raft-exports
+  )
+else()
+  add_definitions(-DCPU_ONLY)
 endif()
 
+
 if(NOT DISABLE_OPENMP)
   find_package(OpenMP)
   if(OPENMP_FOUND)
     message(VERBOSE "RAFT: OpenMP found in ${OpenMP_CXX_INCLUDE_DIRS}")
   endif()
 endif()
-# * find CUDAToolkit package
-# * determine GPU architectures
-# * enable the CMake CUDA language
-# * set other CUDA compilation flags
-rapids_find_package(
-  CUDAToolkit REQUIRED
-  BUILD_EXPORT_SET raft-exports
-  INSTALL_EXPORT_SET raft-exports
-)
+
 include(cmake/modules/ConfigureCUDA.cmake)
 
 # ##################################################################################################
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index e7eb87a29f..e4db48576c 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -128,10 +128,10 @@ function(ConfigureAnnBench)
     ${BENCH_NAME}
     PRIVATE raft::raft
             nlohmann_json::nlohmann_json
-            $<$<BOOL:GPU_BUILD>:$<$<BOOL:${RAFT_ANN_BENCH_USE_MULTIGPU}>:NCCL::NCCL>>
+            $<$<BOOL:${GPU_BUILD}>:$<$<BOOL:${RAFT_ANN_BENCH_USE_MULTIGPU}>:NCCL::NCCL>>
             ${ConfigureAnnBench_LINKS}
             Threads::Threads
-            $<$<BOOL:GPU_BUILD>:${RAFT_CTK_MATH_DEPENDENCIES}>
+            $<$<BOOL:${GPU_BUILD}>:${RAFT_CTK_MATH_DEPENDENCIES}>
             $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
             $<TARGET_NAME_IF_EXISTS:conda_env>
             -static-libgcc
@@ -176,7 +176,6 @@ function(ConfigureAnnBench)
     TARGETS ${BENCH_NAME}
     COMPONENT ann_bench
     DESTINATION bin/ann
-    EXCLUDE_FROM_ALL
   )
 endfunction()
 
diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp
index e0c22d1798..088886b158 100644
--- a/cpp/bench/ann/src/common/ann_types.hpp
+++ b/cpp/bench/ann/src/common/ann_types.hpp
@@ -1,5 +1,3 @@
-
-
 /*
  * Copyright (c) 2023, NVIDIA CORPORATION.
  *
@@ -24,6 +22,9 @@
 
 #ifndef CPU_ONLY
 #include <cuda_runtime_api.h>  // cudaStream_t
+#else
+typedef size_t cudaStream_t;
+typedef size_t cudaEvent_t;
 #endif
 
 namespace raft::bench::ann {
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index 5ce453a116..76ac579609 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -506,7 +506,9 @@ inline auto run_main(int argc, char** argv) -> int
     return -1;
   }
 
+#ifndef CPU_ONLY
   if (!CUDART_FOUND) { log_warn("cudart library is not found, GPU-based indices won't work."); }
+#endif
 
   Configuration conf(conf_stream);
   std::string dtype = conf.get_dataset_conf().dtype;
diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp
index faf440071d..70a612f1d6 100644
--- a/cpp/bench/ann/src/common/util.hpp
+++ b/cpp/bench/ann/src/common/util.hpp
@@ -90,9 +90,15 @@ struct buf {
 
 struct cuda_timer {
  private:
+#ifndef CPU_ONLY
   cudaStream_t stream_{nullptr};
   cudaEvent_t start_{nullptr};
   cudaEvent_t stop_{nullptr};
+#else
+  cudaStream_t stream_{0};
+  cudaEvent_t start_{0};
+  cudaEvent_t stop_{0};
+#endif
   double total_time_{0};
 
  public:
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
index be5b72c5f6..81c27f59ad 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
@@ -116,7 +116,7 @@ REGISTER_ALGO_INSTANCE(std::int8_t);
 REGISTER_ALGO_INSTANCE(std::uint8_t);
 
 #ifdef ANN_BENCH_BUILD_MAIN
-#define CPU_ONLY
+// #define CPU_ONLY
 #include "../common/benchmark.hpp"
 int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
 #endif
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
index 01a7c60702..d0885d56f3 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
@@ -41,20 +41,28 @@ def validate_algorithm(algos_conf, algo):
 
 def find_executable(algos_conf, algo, k, batch_size):
     executable = algos_conf[algo]["executable"]
-    conda_path = os.path.join(
-        os.getenv("CONDA_PREFIX"), "bin", "ann", executable
-    )
-    build_path = os.path.join(
-        os.getenv("RAFT_HOME"), "cpp", "build", executable
-    )
-    if conda_path is not None and os.path.exists(conda_path):
-        print("-- Using RAFT bench found in conda environment: ")
-        return (executable, conda_path, f"{algo}-{k}-{batch_size}")
-    elif build_path is not None and os.path.exists(build_path):
-        print(
-            f"-- Using RAFT bench from repository specified in {build_path}: "
+
+    build_path = os.getenv("RAFT_HOME")
+    if build_path is not None:
+        build_path = os.path.join(
+            build_path, "cpp", "build", executable
         )
-        return (executable, build_path, f"{algo}-{k}-{batch_size}")
+        if os.path.exists(build_path):
+            print(
+                f"-- Using RAFT bench from repository in {build_path}. "
+            )
+            return (executable, build_path, f"{algo}-{k}-{batch_size}")
+
+    # if there is no build folder present, we look in the conda environment
+    conda_path = os.getenv("CONDA_PREFIX")
+    if conda_path is not None:
+        conda_path = os.path.join(
+            conda_path, "bin", "ann", executable
+        )
+        if os.path.exists(conda_path):
+            print("-- Using RAFT bench found in conda environment. ")
+            return (executable, conda_path, f"{algo}-{k}-{batch_size}")
+
     else:
         raise FileNotFoundError(executable)
 

From 5dd7db2587a9d4566ef75a28ee96929b3ef61c78 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Wed, 30 Aug 2023 23:04:16 -0500
Subject: [PATCH 53/70] FIX small fixes from minor errors in prior merges

---
 .../all_cuda-118_arch-x86_64.yaml             |  6 +-
 .../all_cuda-120_arch-x86_64.yaml             |  6 +-
 cpp/bench/ann/CMakeLists.txt                  |  2 +-
 .../ann/src/hnswlib/hnswlib_benchmark.cpp     |  4 +-
 dependencies.yaml                             |  6 +-
 .../raft-ann-bench/data_export/__main__.py    | 79 -------------------
 python/raft-dask/pyproject.toml               |  4 +-
 7 files changed, 15 insertions(+), 92 deletions(-)
 delete mode 100644 python/raft-ann-bench/raft-ann-bench/data_export/__main__.py

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 223bafe70b..7e921decd5 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -19,10 +19,10 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core==2023.7.1
+- dask-core>=2023.7.1
 - dask-cuda==23.10.*
-- dask==2023.7.1
-- distributed==2023.7.1
+- dask>=2023.7.1
+- distributed>=2023.7.1
 - doxygen>=1.8.20
 - gcc_linux-64=11.*
 - gmock>=1.13.0
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index e68feaad82..2ea685b529 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -19,10 +19,10 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core==2023.7.1
+- dask-core>=2023.7.1
 - dask-cuda==23.10.*
-- dask==2023.7.1
-- distributed==2023.7.1
+- dask>=2023.7.1
+- distributed>=2023.7.1
 - doxygen>=1.8.20
 - gcc_linux-64=11.*
 - gmock>=1.13.0
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index e4db48576c..c07fa17a69 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -13,7 +13,7 @@
 # =============================================================================
 
 # ##################################################################################################
-# * benchmark options- -----------------------------------------------------------------------------
+# * benchmark options ------------------------------------------------------------------------------
 
 option(RAFT_ANN_BENCH_USE_FAISS_BFKNN "Include faiss' brute-force knn algorithm in benchmark" ON)
 option(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
index 81c27f59ad..dca84ee774 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
@@ -116,7 +116,9 @@ REGISTER_ALGO_INSTANCE(std::int8_t);
 REGISTER_ALGO_INSTANCE(std::uint8_t);
 
 #ifdef ANN_BENCH_BUILD_MAIN
-// #define CPU_ONLY
+#ifndef CPU_ONLY
+#define CPU_ONLY
+#endif
 #include "../common/benchmark.hpp"
 int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
 #endif
diff --git a/dependencies.yaml b/dependencies.yaml
index 775ce74c56..6f64287f54 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -326,16 +326,16 @@ dependencies:
     common:
       - output_types: [conda, pyproject]
         packages:
-          - dask==2023.7.1
+          - dask>=2023.7.1
           - dask-cuda==23.10.*
-          - distributed==2023.7.1
+          - distributed>=2023.7.1
           - joblib>=0.11
           - numba>=0.57
           - *numpy
           - ucx-py==0.34.*
       - output_types: conda
         packages:
-          - dask-core==2023.7.1
+          - dask-core>=2023.7.1
           - ucx>=1.13.0
           - ucx-proc=*=gpu
       - output_types: pyproject
diff --git a/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
deleted file mode 100644
index afc843140d..0000000000
--- a/python/raft-ann-bench/raft-ann-bench/data_export/__main__.py
+++ /dev/null
@@ -1,79 +0,0 @@
-#
-# Copyright (c) 2023, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import argparse
-import json
-import os
-
-import pandas as pd
-
-
-def read_file(dataset, dataset_path, method):
-    dir = os.path.join(dataset_path, dataset, "result", method)
-    for file in os.listdir(dir):
-        if file.endswith(".json"):
-            with open(os.path.join(dir, file), "r") as f:
-                data = json.load(f)
-                df = pd.DataFrame(data["benchmarks"])
-                yield (os.path.join(dir, file), file.split("-")[0], df)
-
-
-def convert_json_to_csv_build(dataset, dataset_path):
-    for file, algo_name, df in read_file(dataset, dataset_path, "build"):
-        df["name"] = df["name"].str.split("/").str[0]
-        write = pd.DataFrame(
-            {
-                "algo_name": [algo_name] * len(df),
-                "index_name": df["name"],
-                "time": df["real_time"],
-            }
-        )
-        write.to_csv(file.replace(".json", ".csv"), index=False)
-
-
-def convert_json_to_csv_search(dataset, dataset_path):
-    for file, algo_name, df in read_file(dataset, dataset_path, "search"):
-        df["name"] = df["name"].str.split("/").str[0]
-        write = pd.DataFrame(
-            {
-                "algo_name": [algo_name] * len(df),
-                "index_name": df["name"],
-                "recall": df["Recall"],
-                "qps": df["items_per_second"],
-            }
-        )
-        write.to_csv(file.replace(".json", ".csv"), index=False)
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        "--dataset", help="dataset to download", default="glove-100-inner"
-    )
-    parser.add_argument(
-        "--dataset-path",
-        help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"), "bench", "ann", "data"),
-    )
-    args = parser.parse_args()
-    convert_json_to_csv_build(args.dataset, args.dataset_path)
-    convert_json_to_csv_search(args.dataset, args.dataset_path)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml
index 3c81b6f16b..bdbcf61e0f 100644
--- a/python/raft-dask/pyproject.toml
+++ b/python/raft-dask/pyproject.toml
@@ -35,8 +35,8 @@ license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
     "dask-cuda==23.10.*",
-    "dask==2023.7.1",
-    "distributed==2023.7.1",
+    "dask>=2023.7.1",
+    "distributed>=2023.7.1",
     "joblib>=0.11",
     "numba>=0.57",
     "numpy>=1.21",

From 14bcb921afa21f167f833109b2138338fd65c8eb Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Thu, 31 Aug 2023 13:34:31 +0200
Subject: [PATCH 54/70] ANN-bench: more flexible cuda_stub.hpp

---
 cpp/bench/ann/src/common/ann_types.hpp |  6 +--
 cpp/bench/ann/src/common/benchmark.hpp | 18 ++-----
 cpp/bench/ann/src/common/cuda_stub.hpp | 68 ++++++++++++++++++++++----
 cpp/bench/ann/src/common/util.hpp      |  1 +
 4 files changed, 66 insertions(+), 27 deletions(-)

diff --git a/cpp/bench/ann/src/common/ann_types.hpp b/cpp/bench/ann/src/common/ann_types.hpp
index e0c22d1798..bdcfd95b2e 100644
--- a/cpp/bench/ann/src/common/ann_types.hpp
+++ b/cpp/bench/ann/src/common/ann_types.hpp
@@ -18,14 +18,12 @@
 
 #pragma once
 
+#include "cuda_stub.hpp"  // cudaStream_t
+
 #include <stdexcept>
 #include <string>
 #include <vector>
 
-#ifndef CPU_ONLY
-#include <cuda_runtime_api.h>  // cudaStream_t
-#endif
-
 namespace raft::bench::ann {
 
 enum class MemoryType {
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
index 5ce453a116..4e91ee0690 100644
--- a/cpp/bench/ann/src/common/benchmark.hpp
+++ b/cpp/bench/ann/src/common/benchmark.hpp
@@ -33,16 +33,6 @@
 #include <unistd.h>
 #include <vector>
 
-#ifdef ANN_BENCH_BUILD_MAIN
-#ifdef CPU_ONLY
-#define CUDART_FOUND false
-#else
-#define CUDART_FOUND true
-#endif
-#else
-#define CUDART_FOUND (cudart.found())
-#endif
-
 namespace raft::bench::ann {
 
 static inline std::unique_ptr<AnnBase> current_algo{nullptr};
@@ -255,7 +245,7 @@ void bench_search(::benchmark::State& state,
   }
   state.SetItemsProcessed(queries_processed);
   state.counters.insert({{"k", k}, {"n_queries", n_queries}});
-  if (CUDART_FOUND) {
+  if (cudart.found()) {
     state.counters.insert({{"GPU Time", gpu_timer.total_time() / state.iterations()},
                            {"GPU QPS", queries_processed / gpu_timer.total_time()}});
   }
@@ -357,7 +347,7 @@ void dispatch_benchmark(const Configuration& conf,
                         std::string index_prefix,
                         kv_series override_kv)
 {
-  if (CUDART_FOUND) {
+  if (cudart.found()) {
     for (auto [key, value] : cuda_info()) {
       ::benchmark::AddCustomContext(key, value);
     }
@@ -506,7 +496,9 @@ inline auto run_main(int argc, char** argv) -> int
     return -1;
   }
 
-  if (!CUDART_FOUND) { log_warn("cudart library is not found, GPU-based indices won't work."); }
+  if (cudart.needed() && !cudart.found()) {
+    log_warn("cudart library is not found, GPU-based indices won't work.");
+  }
 
   Configuration conf(conf_stream);
   std::string dtype = conf.get_dataset_conf().dtype;
diff --git a/cpp/bench/ann/src/common/cuda_stub.hpp b/cpp/bench/ann/src/common/cuda_stub.hpp
index e3f9aa9e84..7183196f31 100644
--- a/cpp/bench/ann/src/common/cuda_stub.hpp
+++ b/cpp/bench/ann/src/common/cuda_stub.hpp
@@ -15,16 +15,33 @@
  */
 #pragma once
 
-#ifdef ANN_BENCH_LINK_CUDART
+/*
+The content of this header is governed by two preprocessor definitions:
+
+  - CPU_ONLY - whether none of the CUDA functions are used.
+  - ANN_BENCH_LINK_CUDART - dynamically link against this string if defined.
+
+______________________________________________________________________________
+|CPU_ONLY | ANN_BENCH_LINK_CUDART |         cudart      | cuda_runtime_api.h |
+|         |                       |  found    |  needed |      included      |
+|---------|-----------------------|-----------|---------|--------------------|
+|   ON    |    <not defined>      |  false    |  false  |       NO           |
+|   ON    |   "cudart.so.xx.xx"   |  false    |  false  |       NO           |
+|  OFF    |     <nod defined>     |   true    |   true  |      YES           |
+|  OFF    |   "cudart.so.xx.xx"   | <runtime> |   true  |      YES           |
+------------------------------------------------------------------------------
+*/
+
+#ifndef CPU_ONLY
 #include <cuda_runtime_api.h>
+#ifdef ANN_BENCH_LINK_CUDART
+#include <dlfcn.h>
+#endif
 #else
-#define CPU_ONLY
 typedef void* cudaStream_t;
 typedef void* cudaEvent_t;
 #endif
 
-#include <dlfcn.h>
-
 namespace raft::bench::ann {
 
 struct cuda_lib_handle {
@@ -37,15 +54,47 @@ struct cuda_lib_handle {
   }
   ~cuda_lib_handle() noexcept
   {
+#ifdef ANN_BENCH_LINK_CUDART
     if (handle != nullptr) { dlclose(handle); }
+#endif
+  }
+
+  template <typename Symbol>
+  auto sym(const char* name) -> Symbol
+  {
+#ifdef ANN_BENCH_LINK_CUDART
+    return reinterpret_cast<Symbol>(dlsym(handle, name));
+#else
+    return nullptr;
+#endif
   }
 
-  [[nodiscard]] inline auto found() const -> bool { return handle != nullptr; }
+  /** Whether this is NOT a cpu-only package. */
+  [[nodiscard]] constexpr inline auto needed() const -> bool
+  {
+#if defined(CPU_ONLY)
+    return false;
+#else
+    return true;
+#endif
+  }
+
+  /** CUDA found, either at compile time or at runtime. */
+  [[nodiscard]] inline auto found() const -> bool
+  {
+#if defined(CPU_ONLY)
+    return false;
+#elif defined(ANN_BENCH_LINK_CUDART)
+    return handle != nullptr;
+#else
+    return true;
+#endif
+  }
 };
 
 static inline cuda_lib_handle cudart{};
 
-#ifndef CPU_ONLY
+#ifdef ANN_BENCH_LINK_CUDART
 namespace stub {
 
 [[gnu::weak, gnu::noinline]] cudaError_t cudaMemcpy(void* dst,
@@ -130,10 +179,9 @@ namespace stub {
 
 }  // namespace stub
 
-#define RAFT_DECLARE_CUDART(fun)                                                        \
-  static inline decltype(&stub::fun) fun =                                              \
-    cudart.found() ? reinterpret_cast<decltype(&stub::fun)>(dlsym(cudart.handle, #fun)) \
-                   : &stub::fun
+#define RAFT_DECLARE_CUDART(fun)           \
+  static inline decltype(&stub::fun) fun = \
+    cudart.found() ? cudart.sym<decltype(&stub::fun)>(#fun) : &stub::fun
 
 RAFT_DECLARE_CUDART(cudaMemcpy);
 RAFT_DECLARE_CUDART(cudaMalloc);
diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp
index faf440071d..49c4de1b11 100644
--- a/cpp/bench/ann/src/common/util.hpp
+++ b/cpp/bench/ann/src/common/util.hpp
@@ -16,6 +16,7 @@
 #pragma once
 
 #include "ann_types.hpp"
+#include "cuda_stub.hpp"  // cuda-related utils
 
 #ifdef ANN_BENCH_NVTX3_HEADERS_FOUND
 #include <nvtx3/nvToolsExt.h>

From 50c9fe205f814ae797da14ee90c89f012e334b30 Mon Sep 17 00:00:00 2001
From: achirkin <achirkin@users.noreply.github.com>
Date: Thu, 31 Aug 2023 14:30:45 +0200
Subject: [PATCH 55/70] Make dlopen more flexible looking for the cudart
 version to link.

---
 cpp/bench/ann/CMakeLists.txt           |  2 +-
 cpp/bench/ann/src/common/cuda_stub.hpp | 26 +++++++++++++++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 119a5c0a73..54cddb678e 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -254,7 +254,7 @@ if(RAFT_ANN_BENCH_SINGLE_EXE)
   target_compile_definitions(
     ANN_BENCH
     PRIVATE
-      $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}">
+      $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH}">
       $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
   )
 
diff --git a/cpp/bench/ann/src/common/cuda_stub.hpp b/cpp/bench/ann/src/common/cuda_stub.hpp
index 7183196f31..7c11194842 100644
--- a/cpp/bench/ann/src/common/cuda_stub.hpp
+++ b/cpp/bench/ann/src/common/cuda_stub.hpp
@@ -35,6 +35,7 @@ ______________________________________________________________________________
 #ifndef CPU_ONLY
 #include <cuda_runtime_api.h>
 #ifdef ANN_BENCH_LINK_CUDART
+#include <cstring>
 #include <dlfcn.h>
 #endif
 #else
@@ -49,7 +50,30 @@ struct cuda_lib_handle {
   explicit cuda_lib_handle()
   {
 #ifdef ANN_BENCH_LINK_CUDART
-    handle = dlopen(ANN_BENCH_LINK_CUDART, RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE);
+    constexpr int kFlags = RTLD_NOW | RTLD_GLOBAL | RTLD_DEEPBIND | RTLD_NODELETE;
+    // The full name of the linked cudart library 'cudart.so.MAJOR.MINOR.PATCH'
+    char libname[] = ANN_BENCH_LINK_CUDART;  // NOLINT
+    handle         = dlopen(ANN_BENCH_LINK_CUDART, kFlags);
+    if (handle != nullptr) { return; }
+    // try strip the PATCH
+    auto p = strrchr(libname, '.');
+    p[0]   = 0;
+    handle = dlopen(libname, kFlags);
+    if (handle != nullptr) { return; }
+    // try set the MINOR version to 0
+    p      = strrchr(libname, '.');
+    p[1]   = '0';
+    p[2]   = 0;
+    handle = dlopen(libname, kFlags);
+    if (handle != nullptr) { return; }
+    // try strip the MINOR
+    p[0]   = 0;
+    handle = dlopen(libname, kFlags);
+    if (handle != nullptr) { return; }
+    // try strip the MAJOR
+    p      = strrchr(libname, '.');
+    p[0]   = 0;
+    handle = dlopen(libname, kFlags);
 #endif
   }
   ~cuda_lib_handle() noexcept

From c6df11aa95b1cc172d44c23aea24c277b2848349 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Thu, 31 Aug 2023 11:13:23 -0400
Subject: [PATCH 56/70] Fixing style

---
 cpp/CMakeLists.txt                                   |  4 ++--
 .../src/raft-ann-bench/run/__main__.py               | 12 +++---------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 28ced15afa..15a3fb61f2 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -70,7 +70,8 @@ option(RAFT_NVTX "Enable nvtx markers" OFF)
 set(RAFT_COMPILE_LIBRARY_DEFAULT OFF)
 if((BUILD_TESTS
     OR BUILD_PRIMS_BENCH
-    OR BUILD_ANN_BENCH)
+    OR BUILD_ANN_BENCH
+   )
    AND NOT CPU_ONLY
 )
   set(RAFT_COMPILE_LIBRARY_DEFAULT ON)
@@ -155,7 +156,6 @@ else()
   add_definitions(-DCPU_ONLY)
 endif()
 
-
 if(NOT DISABLE_OPENMP)
   find_package(OpenMP)
   if(OPENMP_FOUND)
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
index d0885d56f3..32f6359a47 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
@@ -44,21 +44,15 @@ def find_executable(algos_conf, algo, k, batch_size):
 
     build_path = os.getenv("RAFT_HOME")
     if build_path is not None:
-        build_path = os.path.join(
-            build_path, "cpp", "build", executable
-        )
+        build_path = os.path.join(build_path, "cpp", "build", executable)
         if os.path.exists(build_path):
-            print(
-                f"-- Using RAFT bench from repository in {build_path}. "
-            )
+            print(f"-- Using RAFT bench from repository in {build_path}. ")
             return (executable, build_path, f"{algo}-{k}-{batch_size}")
 
     # if there is no build folder present, we look in the conda environment
     conda_path = os.getenv("CONDA_PREFIX")
     if conda_path is not None:
-        conda_path = os.path.join(
-            conda_path, "bin", "ann", executable
-        )
+        conda_path = os.path.join(conda_path, "bin", "ann", executable)
         if os.path.exists(conda_path):
             print("-- Using RAFT bench found in conda environment. ")
             return (executable, conda_path, f"{algo}-{k}-{batch_size}")

From 9ffd68e22ca12bc09caf4a5fe4b8ed4b54650edf Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Thu, 31 Aug 2023 12:00:15 -0400
Subject: [PATCH 57/70] Fixing omp error

---
 build.sh                                      |  6 ++---
 cpp/CMakeLists.txt                            | 26 +++++++++++--------
 cpp/bench/ann/CMakeLists.txt                  | 13 +++++-----
 cpp/bench/ann/src/common/cuda_stub.hpp        | 10 +++----
 cpp/bench/ann/src/common/dataset.hpp          |  8 +++---
 cpp/bench/ann/src/common/util.hpp             | 18 ++++++-------
 .../ann/src/hnswlib/hnswlib_benchmark.cpp     |  3 ---
 7 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/build.sh b/build.sh
index aa4446bcde..4bf59d7879 100755
--- a/build.sh
+++ b/build.sh
@@ -72,7 +72,7 @@ BUILD_TESTS=OFF
 BUILD_TYPE=Release
 BUILD_PRIMS_BENCH=OFF
 BUILD_ANN_BENCH=OFF
-CPU_ONLY=OFF
+BUILD_CPU_ONLY=OFF
 COMPILE_LIBRARY=OFF
 INSTALL_TARGET=install
 BUILD_REPORT_METRICS=""
@@ -346,7 +346,7 @@ if hasArg bench-ann || (( ${NUMARGS} == 0 )); then
     CMAKE_TARGET="${CMAKE_TARGET};${ANN_BENCH_TARGETS}"
     if hasArg --cpu-only; then
         COMPILE_LIBRARY=OFF
-        CPU_ONLY=ON
+        BUILD_CPU_ONLY=ON
         NVTX=OFF
     else
         COMPILE_LIBRARY=ON
@@ -423,7 +423,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
           -DBUILD_TESTS=${BUILD_TESTS} \
           -DBUILD_PRIMS_BENCH=${BUILD_PRIMS_BENCH} \
           -DBUILD_ANN_BENCH=${BUILD_ANN_BENCH} \
-          -DCPU_ONLY=${CPU_ONLY} \
+          -DBUILD_CPU_ONLY=${BUILD_CPU_ONLY} \
           -DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \
           ${CACHE_ARGS} \
           ${EXTRA_CMAKE_ARGS}
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 15a3fb61f2..8e0480b049 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -20,9 +20,9 @@ include(rapids-cpm)
 include(rapids-export)
 include(rapids-find)
 
-option(CPU_ONLY "Build CPU only components. Apples to RAFT ANN benchmarks currently" OFF)
+option(BUILD_CPU_ONLY "Build CPU only components. Apples to RAFT ANN benchmarks currently" OFF)
 
-if(NOT CPU_ONLY)
+if(NOT BUILD_CPU_ONLY)
   include(rapids-cuda)
   rapids_cuda_init_architectures(RAFT)
 endif()
@@ -33,7 +33,7 @@ project(
   LANGUAGES CXX
 )
 
-if(NOT CPU_ONLY)
+if(NOT BUILD_CPU_ONLY)
   enable_language(CUDA)
 endif()
 
@@ -72,7 +72,7 @@ if((BUILD_TESTS
     OR BUILD_PRIMS_BENCH
     OR BUILD_ANN_BENCH
    )
-   AND NOT CPU_ONLY
+   AND NOT BUILD_CPU_ONLY
 )
   set(RAFT_COMPILE_LIBRARY_DEFAULT ON)
 endif()
@@ -80,7 +80,7 @@ option(RAFT_COMPILE_LIBRARY "Enable building raft shared library instantiations"
        ${RAFT_COMPILE_LIBRARY_DEFAULT}
 )
 
-if(CPU_ONLY)
+if(BUILD_CPU_ONLY)
   set(BUILD_SHARED_LIBS OFF)
   set(BUILD_TESTS OFF)
 endif()
@@ -98,7 +98,7 @@ message(VERBOSE "RAFT: Building optional components: ${raft_FIND_COMPONENTS}")
 message(VERBOSE "RAFT: Build RAFT unit-tests: ${BUILD_TESTS}")
 message(VERBOSE "RAFT: Building raft C++ benchmarks: ${BUILD_PRIMS_BENCH}")
 message(VERBOSE "RAFT: Building ANN benchmarks: ${BUILD_ANN_BENCH}")
-message(VERBOSE "RAFT: Build CPU only components: ${CPU_ONLY}")
+message(VERBOSE "RAFT: Build CPU only components: ${BUILD_CPU_ONLY}")
 message(VERBOSE "RAFT: Enable detection of conda environment for dependencies: ${DETECT_CONDA_ENV}")
 message(VERBOSE "RAFT: Disable depreaction warnings " ${DISABLE_DEPRECATION_WARNINGS})
 message(VERBOSE "RAFT: Disable OpenMP: ${DISABLE_OPENMP}")
@@ -140,7 +140,7 @@ if(CUDA_STATIC_RUNTIME)
   set(_ctk_static_suffix "_static")
 endif()
 
-if(NOT CPU_ONLY)
+if(NOT BUILD_CPU_ONLY)
   # CUDA runtime
   rapids_cuda_init_runtime(USE_STATIC ${CUDA_STATIC_RUNTIME})
   # * find CUDAToolkit package
@@ -153,11 +153,15 @@ if(NOT CPU_ONLY)
     INSTALL_EXPORT_SET raft-exports
   )
 else()
-  add_definitions(-DCPU_ONLY)
+  add_definitions(-DBUILD_CPU_ONLY)
 endif()
 
 if(NOT DISABLE_OPENMP)
-  find_package(OpenMP)
+  rapids_find_package(
+    OpenMP REQUIRED
+    BUILD_EXPORT_SET raft-exports
+    INSTALL_EXPORT_SET raft-exports
+  )
   if(OPENMP_FOUND)
     message(VERBOSE "RAFT: OpenMP found in ${OpenMP_CXX_INCLUDE_DIRS}")
   endif()
@@ -171,7 +175,7 @@ include(cmake/modules/ConfigureCUDA.cmake)
 # add third party dependencies using CPM
 rapids_cpm_init()
 
-if(NOT CPU_ONLY)
+if(NOT BUILD_CPU_ONLY)
   # thrust before rmm/cuco so we get the right version of thrust/cub
   include(cmake/thirdparty/get_thrust.cmake)
   include(cmake/thirdparty/get_rmm.cmake)
@@ -199,7 +203,7 @@ target_include_directories(
   raft INTERFACE "$<BUILD_INTERFACE:${RAFT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:include>"
 )
 
-if(NOT CPU_ONLY)
+if(NOT BUILD_CPU_ONLY)
   # Keep RAFT as lightweight as possible. Only CUDA libs and rmm should be used in global target.
   target_link_libraries(raft INTERFACE rmm::rmm cuco::cuco nvidia::cutlass::cutlass raft::Thrust)
 endif()
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 3f4e3c2992..b224258708 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -32,7 +32,7 @@ option(RAFT_ANN_BENCH_SINGLE_EXE
 
 find_package(Threads REQUIRED)
 
-if(CPU_ONLY)
+if(BUILD_CPU_ONLY)
   set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF)
   set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF)
   set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF)
@@ -103,10 +103,6 @@ function(ConfigureAnnBench)
   set(oneValueArgs NAME)
   set(multiValueArgs PATH LINKS CXXFLAGS INCLUDES)
 
-  if(NOT CPU_ONLY)
-    set(GPU_BUILD ON)
-  endif()
-
   cmake_parse_arguments(
     ConfigureAnnBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}
   )
@@ -124,11 +120,16 @@ function(ConfigureAnnBench)
     target_link_libraries(${BENCH_NAME} PRIVATE benchmark::benchmark)
   endif()
 
+  if(NOT BUILD_CPU_ONLY)
+    set(GPU_BUILD ON)
+  else()
+    target_compile_definitions(${BENCH_NAME} PUBLIC CPU_ONY)
+  endif()
+
   target_link_libraries(
     ${BENCH_NAME}
     PRIVATE raft::raft
             nlohmann_json::nlohmann_json
-            $<$<BOOL:${GPU_BUILD}>:$<$<BOOL:${RAFT_ANN_BENCH_USE_MULTIGPU}>:NCCL::NCCL>>
             ${ConfigureAnnBench_LINKS}
             Threads::Threads
             $<$<BOOL:${GPU_BUILD}>:${RAFT_CTK_MATH_DEPENDENCIES}>
diff --git a/cpp/bench/ann/src/common/cuda_stub.hpp b/cpp/bench/ann/src/common/cuda_stub.hpp
index 7c11194842..b2e3130304 100644
--- a/cpp/bench/ann/src/common/cuda_stub.hpp
+++ b/cpp/bench/ann/src/common/cuda_stub.hpp
@@ -18,11 +18,11 @@
 /*
 The content of this header is governed by two preprocessor definitions:
 
-  - CPU_ONLY - whether none of the CUDA functions are used.
+  - BUILD_CPU_ONLY - whether none of the CUDA functions are used.
   - ANN_BENCH_LINK_CUDART - dynamically link against this string if defined.
 
 ______________________________________________________________________________
-|CPU_ONLY | ANN_BENCH_LINK_CUDART |         cudart      | cuda_runtime_api.h |
+|BUILD_CPU_ONLY | ANN_BENCH_LINK_CUDART |         cudart      | cuda_runtime_api.h |
 |         |                       |  found    |  needed |      included      |
 |---------|-----------------------|-----------|---------|--------------------|
 |   ON    |    <not defined>      |  false    |  false  |       NO           |
@@ -32,7 +32,7 @@ ______________________________________________________________________________
 ------------------------------------------------------------------------------
 */
 
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
 #include <cuda_runtime_api.h>
 #ifdef ANN_BENCH_LINK_CUDART
 #include <cstring>
@@ -96,7 +96,7 @@ struct cuda_lib_handle {
   /** Whether this is NOT a cpu-only package. */
   [[nodiscard]] constexpr inline auto needed() const -> bool
   {
-#if defined(CPU_ONLY)
+#if defined(BUILD_CPU_ONLY)
     return false;
 #else
     return true;
@@ -106,7 +106,7 @@ struct cuda_lib_handle {
   /** CUDA found, either at compile time or at runtime. */
   [[nodiscard]] inline auto found() const -> bool
   {
-#if defined(CPU_ONLY)
+#if defined(BUILD_CPU_ONLY)
     return false;
 #elif defined(ANN_BENCH_LINK_CUDART)
     return handle != nullptr;
diff --git a/cpp/bench/ann/src/common/dataset.hpp b/cpp/bench/ann/src/common/dataset.hpp
index 7fa82a632f..ccc5915b3c 100644
--- a/cpp/bench/ann/src/common/dataset.hpp
+++ b/cpp/bench/ann/src/common/dataset.hpp
@@ -17,7 +17,7 @@
 
 #include "util.hpp"
 
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
 #include <cuda_fp16.h>
 #else
 typedef uint16_t half;
@@ -326,7 +326,7 @@ Dataset<T>::~Dataset()
   delete[] base_set_;
   delete[] query_set_;
   delete[] gt_set_;
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
   if (d_base_set_) { cudaFree(d_base_set_); }
   if (d_query_set_) { cudaFree(d_query_set_); }
 #endif
@@ -335,7 +335,7 @@ Dataset<T>::~Dataset()
 template <typename T>
 const T* Dataset<T>::base_set_on_gpu() const
 {
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
   if (!d_base_set_) {
     base_set();
     cudaMalloc((void**)&d_base_set_, base_set_size() * dim() * sizeof(T));
@@ -348,7 +348,7 @@ const T* Dataset<T>::base_set_on_gpu() const
 template <typename T>
 const T* Dataset<T>::query_set_on_gpu() const
 {
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
   if (!d_query_set_) {
     query_set();
     cudaMalloc((void**)&d_query_set_, query_set_size() * dim() * sizeof(T));
diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp
index 5dfbf7b968..5c9c053057 100644
--- a/cpp/bench/ann/src/common/util.hpp
+++ b/cpp/bench/ann/src/common/util.hpp
@@ -47,7 +47,7 @@ struct buf {
     : memory_type(memory_type), size(size), data(nullptr)
   {
     switch (memory_type) {
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
       case MemoryType::Device: {
         cudaMalloc(reinterpret_cast<void**>(&data), size * sizeof(T));
         cudaMemset(data, 0, size * sizeof(T));
@@ -63,7 +63,7 @@ struct buf {
   {
     if (data == nullptr) { return; }
     switch (memory_type) {
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
       case MemoryType::Device: {
         cudaFree(data);
       } break;
@@ -77,7 +77,7 @@ struct buf {
   [[nodiscard]] auto move(MemoryType target_memory_type) -> buf<T>
   {
     buf<T> r{target_memory_type, size};
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
     if ((memory_type == MemoryType::Device && target_memory_type != MemoryType::Device) ||
         (memory_type != MemoryType::Device && target_memory_type == MemoryType::Device)) {
       cudaMemcpy(r.data, data, size * sizeof(T), cudaMemcpyDefault);
@@ -91,7 +91,7 @@ struct buf {
 
 struct cuda_timer {
  private:
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
   cudaStream_t stream_{nullptr};
   cudaEvent_t start_{nullptr};
   cudaEvent_t stop_{nullptr};
@@ -114,7 +114,7 @@ struct cuda_timer {
     cuda_lap(cudaStream_t stream, cudaEvent_t start, cudaEvent_t stop, double& total_time)
       : start_(start), stop_(stop), stream_(stream), total_time_(total_time)
     {
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
       cudaStreamSynchronize(stream_);
       cudaEventRecord(start_, stream_);
 #endif
@@ -123,7 +123,7 @@ struct cuda_timer {
 
     ~cuda_lap() noexcept
     {
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
       cudaEventRecord(stop_, stream_);
       cudaEventSynchronize(stop_);
       float milliseconds = 0.0f;
@@ -135,7 +135,7 @@ struct cuda_timer {
 
   cuda_timer()
   {
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
     cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking);
     cudaEventCreate(&stop_);
     cudaEventCreate(&start_);
@@ -144,7 +144,7 @@ struct cuda_timer {
 
   ~cuda_timer() noexcept
   {
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
     cudaEventDestroy(start_);
     cudaEventDestroy(stop_);
     cudaStreamDestroy(stream_);
@@ -164,7 +164,7 @@ struct cuda_timer {
 inline auto cuda_info()
 {
   std::vector<std::tuple<std::string, std::string>> props;
-#ifndef CPU_ONLY
+#ifndef BUILD_CPU_ONLY
   int dev, driver = 0, runtime = 0;
   cudaDriverGetVersion(&driver);
   cudaRuntimeGetVersion(&runtime);
diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
index dca84ee774..7d96e54989 100644
--- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
+++ b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp
@@ -116,9 +116,6 @@ REGISTER_ALGO_INSTANCE(std::int8_t);
 REGISTER_ALGO_INSTANCE(std::uint8_t);
 
 #ifdef ANN_BENCH_BUILD_MAIN
-#ifndef CPU_ONLY
-#define CPU_ONLY
-#endif
 #include "../common/benchmark.hpp"
 int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); }
 #endif

From 11f353b1ecf5fd50894993d1f0a9a43d9ad5aa2e Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Thu, 31 Aug 2023 15:16:03 -0400
Subject: [PATCH 58/70] FIxing a couple thing in conf files

---
 ...-angular.json => deep-image-96-inner.json} | 154 +++++++++---------
 .../run/conf/gist-960-euclidean.json          |   4 +-
 .../run/conf/glove-100-angular.json           |   4 +-
 .../run/conf/glove-50-angular.json            |   4 +-
 .../run/conf/lastfm-65-angular.json           |   4 +-
 .../run/conf/mnist-784-euclidean.json         |   4 +-
 .../run/conf/nytimes-256-angular.json         |   4 +-
 7 files changed, 89 insertions(+), 89 deletions(-)
 rename python/raft-ann-bench/src/raft-ann-bench/run/conf/{deep-image-96-angular.json => deep-image-96-inner.json} (77%)

diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json
similarity index 77%
rename from python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-angular.json
rename to python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json
index 0724d8b09f..f1c033e415 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-angular.json
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json
@@ -1,8 +1,8 @@
 {
   "dataset": {
-    "name": "deep-image-96-angular",
-    "base_file": "data/deep-image-96-angular/base.fbin",
-    "query_file": "data/deep-image-96-angular/query.fbin",
+    "name": "deep-image-96-inner",
+    "base_file": "deep-image-96-inner/base.fbin",
+    "query_file": "deep-image-96-inner/query.fbin",
     "distance": "euclidean"
   },
   "search_basic_param": {
@@ -15,7 +15,7 @@
       "name" : "hnswlib.M12",
       "algo" : "hnswlib",
       "build_param": {"M":12, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-angular/hnswlib/M12",
+      "file" : "index/deep-image-96-inner/hnswlib/M12",
       "search_params" : [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
@@ -28,13 +28,13 @@
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
       ],
-      "search_result_file" : "result/deep-image-96-angular/hnswlib/M12"
+      "search_result_file" : "result/deep-image-96-inner/hnswlib/M12"
     },
     {
       "name" : "hnswlib.M16",
       "algo" : "hnswlib",
       "build_param": {"M":16, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-angular/hnswlib/M16",
+      "file" : "index/deep-image-96-inner/hnswlib/M16",
       "search_params" : [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
@@ -47,13 +47,13 @@
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
       ],
-      "search_result_file" : "result/deep-image-96-angular/hnswlib/M16"
+      "search_result_file" : "result/deep-image-96-inner/hnswlib/M16"
     },
     {
       "name" : "hnswlib.M24",
       "algo" : "hnswlib",
       "build_param": {"M":24, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-angular/hnswlib/M24",
+      "file" : "index/deep-image-96-inner/hnswlib/M24",
       "search_params" : [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
@@ -66,13 +66,13 @@
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
       ],
-      "search_result_file" : "result/deep-image-96-angular/hnswlib/M24"
+      "search_result_file" : "result/deep-image-96-inner/hnswlib/M24"
     },
     {
       "name" : "hnswlib.M36",
       "algo" : "hnswlib",
       "build_param": {"M":36, "efConstruction":500, "numThreads":32},
-      "file" : "index/deep-image-96-angular/hnswlib/M36",
+      "file" : "index/deep-image-96-inner/hnswlib/M36",
       "search_params" : [
         {"ef":10, "numThreads":1},
         {"ef":20, "numThreads":1},
@@ -85,7 +85,7 @@
         {"ef":600, "numThreads":1},
         {"ef":800, "numThreads":1}
       ],
-      "search_result_file" : "result/deep-image-96-angular/hnswlib/M36"
+      "search_result_file" : "result/deep-image-96-inner/hnswlib/M36"
     },
 
 
@@ -96,13 +96,13 @@
       "algo": "raft_bfknn",
 
       "build_param": {},
-      "file": "index/deep-image-96-angular/raft_bfknn/bfknn",
+      "file": "index/deep-image-96-inner/raft_bfknn/bfknn",
       "search_params": [
         {
           "probe": 1
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_bfknn/bfknn"
+      "search_result_file": "result/deep-image-96-inner/raft_bfknn/bfknn"
     },
     {
       "name": "faiss_ivf_flat.nlist1024",
@@ -110,7 +110,7 @@
       "build_param": {
         "nlist": 1024
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist1024",
+      "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist1024",
       "search_params": [
         {
           "nprobe": 1
@@ -137,7 +137,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist1024"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist1024"
     },
     {
       "name": "faiss_ivf_flat.nlist2048",
@@ -145,7 +145,7 @@
       "build_param": {
         "nlist": 2048
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist2048",
+      "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist2048",
       "search_params": [
         {
           "nprobe": 1
@@ -172,7 +172,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist2048"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist2048"
     },
     {
       "name": "faiss_ivf_flat.nlist4096",
@@ -180,7 +180,7 @@
       "build_param": {
         "nlist": 4096
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist4096",
+      "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist4096",
       "search_params": [
         {
           "nprobe": 1
@@ -207,7 +207,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist4096"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist4096"
     },
     {
       "name": "faiss_ivf_flat.nlist8192",
@@ -215,7 +215,7 @@
       "build_param": {
         "nlist": 8192
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist8192",
+      "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist8192",
       "search_params": [
         {
           "nprobe": 1
@@ -242,7 +242,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist8192"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist8192"
     },
     {
       "name": "faiss_ivf_flat.nlist16384",
@@ -250,7 +250,7 @@
       "build_param": {
         "nlist": 16384
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_flat/nlist16384",
+      "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist16384",
       "search_params": [
         {
           "nprobe": 1
@@ -280,7 +280,7 @@
           "nprobe": 2000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_flat/nlist16384"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist16384"
     },
     {
       "name": "faiss_ivf_pq.M64-nlist1024",
@@ -291,7 +291,7 @@
         "useFloat16": true,
         "usePrecomputed": true
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024",
+      "file": "index/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024",
       "search_params": [
         {"nprobe": 10},
         {"nprobe": 50},
@@ -300,7 +300,7 @@
         {"nprobe": 500},
         {"nprobe": 1000}
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024"
     },
     {
       "name": "faiss_ivf_pq.M64-nlist1024.noprecomp",
@@ -311,7 +311,7 @@
         "useFloat16": true,
         "usePrecomputed": false
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024.noprecomp",
+      "file": "index/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024.noprecomp",
       "search_params": [
         {"nprobe": 10},
         {"nprobe": 50},
@@ -320,7 +320,7 @@
         {"nprobe": 500},
         {"nprobe": 1000}
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_pq/M64-nlist1024"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024"
     },
     {
       "name": "faiss_ivf_sq.nlist1024-fp16",
@@ -329,7 +329,7 @@
         "nlist": 1024,
         "quantizer_type": "fp16"
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist1024-fp16",
+      "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist1024-fp16",
       "search_params": [
         {
           "nprobe": 1
@@ -356,7 +356,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist1024-fp16"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist1024-fp16"
     },
     {
       "name": "faiss_ivf_sq.nlist2048-fp16",
@@ -365,7 +365,7 @@
         "nlist": 2048,
         "quantizer_type": "fp16"
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist2048-fp16",
+      "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist2048-fp16",
       "search_params": [
         {
           "nprobe": 1
@@ -392,7 +392,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist2048-fp16"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist2048-fp16"
     },
     {
       "name": "faiss_ivf_sq.nlist4096-fp16",
@@ -401,7 +401,7 @@
         "nlist": 4096,
         "quantizer_type": "fp16"
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist4096-fp16",
+      "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist4096-fp16",
       "search_params": [
         {
           "nprobe": 1
@@ -428,7 +428,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist4096-fp16"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist4096-fp16"
     },
     {
       "name": "faiss_ivf_sq.nlist8192-fp16",
@@ -437,7 +437,7 @@
         "nlist": 8192,
         "quantizer_type": "fp16"
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist8192-fp16",
+      "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist8192-fp16",
       "search_params": [
         {
           "nprobe": 1
@@ -464,7 +464,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist8192-fp16"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist8192-fp16"
     },
     {
       "name": "faiss_ivf_sq.nlist16384-fp16",
@@ -473,7 +473,7 @@
         "nlist": 16384,
         "quantizer_type": "fp16"
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist16384-fp16",
+      "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist16384-fp16",
       "search_params": [
         {
           "nprobe": 1
@@ -503,7 +503,7 @@
           "nprobe": 2000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist16384-fp16"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist16384-fp16"
     },
     {
       "name": "faiss_ivf_sq.nlist1024-int8",
@@ -512,7 +512,7 @@
         "nlist": 1024,
         "quantizer_type": "int8"
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist1024-int8",
+      "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist1024-int8",
       "search_params": [
         {
           "nprobe": 1
@@ -539,7 +539,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist1024-int8"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist1024-int8"
     },
     {
       "name": "faiss_ivf_sq.nlist2048-int8",
@@ -548,7 +548,7 @@
         "nlist": 2048,
         "quantizer_type": "int8"
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist2048-int8",
+      "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist2048-int8",
       "search_params": [
         {
           "nprobe": 1
@@ -575,7 +575,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist2048-int8"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist2048-int8"
     },
     {
       "name": "faiss_ivf_sq.nlist4096-int8",
@@ -584,7 +584,7 @@
         "nlist": 4096,
         "quantizer_type": "int8"
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist4096-int8",
+      "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist4096-int8",
       "search_params": [
         {
           "nprobe": 1
@@ -611,7 +611,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist4096-int8"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist4096-int8"
     },
     {
       "name": "faiss_ivf_sq.nlist8192-int8",
@@ -620,7 +620,7 @@
         "nlist": 8192,
         "quantizer_type": "int8"
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist8192-int8",
+      "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist8192-int8",
       "search_params": [
         {
           "nprobe": 1
@@ -647,7 +647,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist8192-int8"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist8192-int8"
     },
     {
       "name": "faiss_ivf_sq.nlist16384-int8",
@@ -656,7 +656,7 @@
         "nlist": 16384,
         "quantizer_type": "int8"
       },
-      "file": "index/deep-image-96-angular/faiss_ivf_sq/nlist16384-int8",
+      "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist16384-int8",
       "search_params": [
         {
           "nprobe": 1
@@ -686,17 +686,17 @@
           "nprobe": 2000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_ivf_sq/nlist16384-int8"
+      "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist16384-int8"
     },
     {
       "name": "faiss_flat",
       "algo": "faiss_gpu_flat",
       "build_param": {},
-      "file": "index/deep-image-96-angular/faiss_flat/flat",
+      "file": "index/deep-image-96-inner/faiss_flat/flat",
       "search_params": [
         {}
       ],
-      "search_result_file": "result/deep-image-96-angular/faiss_flat/flat"
+      "search_result_file": "result/deep-image-96-inner/faiss_flat/flat"
     },
 
     {
@@ -705,7 +705,7 @@
 
       "build_param": {"nlist": 1024, "pq_dim": 128, "ratio": 1, "niter": 25
       },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024",
+      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024",
       "search_params": [
         {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "half"},
         {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "half"},
@@ -714,7 +714,7 @@
         {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "half"},
         {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "half"}
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024"
     },
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-float",
@@ -726,7 +726,7 @@
         "ratio": 1,
         "niter": 25
       },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float",
+      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-float",
       "search_params": [
         {"nprobe": 1, "internalDistanceDtype": "float", "smemLutDtype": "float"},
         {"nprobe": 5, "internalDistanceDtype": "float", "smemLutDtype": "float"},
@@ -737,7 +737,7 @@
         {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"},
         {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"}
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-float"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-float"
     },
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-half",
@@ -749,7 +749,7 @@
         "ratio": 1,
         "niter": 25
       },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half",
+      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-half",
       "search_params": [
         {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"},
         {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"},
@@ -758,7 +758,7 @@
         {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"},
         {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"}
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-half"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-half"
     },
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-float-fp8",
@@ -769,7 +769,7 @@
         "ratio": 1,
         "niter": 25
       },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
+      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8",
       "search_params": [
         {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
         {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
@@ -778,7 +778,7 @@
         {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
         {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-float-fp8"
     },
     {
       "name": "raft_ivf_pq.dimpq64-cluster1024-float-fp8",
@@ -789,7 +789,7 @@
         "ratio": 1,
         "niter": 25
       },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
+      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8",
       "search_params": [
         {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
         {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
@@ -798,7 +798,7 @@
         {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
         {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-fp8"
     },
     {
       "name": "raft_ivf_pq.dimpq64-cluster1024-float-half",
@@ -809,7 +809,7 @@
         "ratio": 1,
         "niter": 25
       },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half",
+      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-half",
       "search_params": [
         {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "half"},
         {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "half"},
@@ -818,7 +818,7 @@
         {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "half"},
         {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "half"}
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq64-cluster1024-float-half"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq64-cluster1024-float-half"
     },
     {
       "name": "raft_ivf_pq.dimpq32-cluster1024-float-fp8",
@@ -829,7 +829,7 @@
         "ratio": 1,
         "niter": 25
       },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
+      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8",
       "search_params": [
         {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
         {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
@@ -838,7 +838,7 @@
         {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
         {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq32-cluster1024-float-fp8"
     },
     {
       "name": "raft_ivf_pq.dimpq16-cluster1024-float-fp8",
@@ -850,7 +850,7 @@
         "ratio": 1,
         "niter": 25
       },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
+      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8",
       "search_params": [
         {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
         {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
@@ -859,7 +859,7 @@
         {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "fp8"},
         {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "fp8"}
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq16-cluster1024-float-fp8"
     },
     {
       "name": "raft_ivf_pq.dimpq128-cluster1024-half-float",
@@ -870,7 +870,7 @@
         "ratio": 1,
         "niter": 25
       },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float",
+      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-half-float",
       "search_params": [
         {"nprobe": 10, "internalDistanceDtype": "half", "smemLutDtype": "float"},
         {"nprobe": 50, "internalDistanceDtype": "half", "smemLutDtype": "float"},
@@ -879,7 +879,7 @@
         {"nprobe": 500, "internalDistanceDtype": "half", "smemLutDtype": "float"},
         {"nprobe": 1024, "internalDistanceDtype": "half", "smemLutDtype": "float"}
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq128-cluster1024-half-float"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq128-cluster1024-half-float"
     },
     {
       "name": "raft_ivf_pq.dimpq512-cluster1024-float-float",
@@ -890,7 +890,7 @@
         "ratio": 1,
         "niter": 25
       },
-      "file": "index/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float",
+      "file": "index/deep-image-96-inner/raft_ivf_pq/dimpq512-cluster1024-float-float",
       "search_params": [
         {"nprobe": 10, "internalDistanceDtype": "float", "smemLutDtype": "float"},
         {"nprobe": 50, "internalDistanceDtype": "float", "smemLutDtype": "float"},
@@ -899,7 +899,7 @@
         {"nprobe": 500, "internalDistanceDtype": "float", "smemLutDtype": "float"},
         {"nprobe": 1024, "internalDistanceDtype": "float", "smemLutDtype": "float"}
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_pq/dimpq512-cluster1024-float-float"
     },
     {
       "name": "raft_ivf_flat.nlist1024",
@@ -909,7 +909,7 @@
         "ratio": 1,
         "niter": 25
       },
-      "file": "index/deep-image-96-angular/raft_ivf_flat/nlist1024",
+      "file": "index/deep-image-96-inner/raft_ivf_flat/nlist1024",
       "search_params": [
         {
           "nprobe": 1
@@ -936,7 +936,7 @@
           "nprobe": 1000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_flat/nlist1024"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_flat/nlist1024"
     },
     {
       "name": "raft_ivf_flat.nlist16384",
@@ -946,7 +946,7 @@
         "ratio": 2,
         "niter": 20
       },
-      "file": "index/deep-image-96-angular/raft_ivf_flat/nlist16384",
+      "file": "index/deep-image-96-inner/raft_ivf_flat/nlist16384",
       "search_params": [
         {
           "nprobe": 1
@@ -976,7 +976,7 @@
           "nprobe": 2000
         }
       ],
-      "search_result_file": "result/deep-image-96-angular/raft_ivf_flat/nlist16384"
+      "search_result_file": "result/deep-image-96-inner/raft_ivf_flat/nlist16384"
     },
 
     {
@@ -985,13 +985,13 @@
       "build_param": {
         "graph_degree" : 32
       },
-      "file" : "index/deep-image-96-angular/raft_cagra/dim32",
+      "file" : "index/deep-image-96-inner/raft_cagra/dim32",
       "search_params" : [
         {"itopk": 32},
         {"itopk": 64},
         {"itopk": 128}
       ],
-      "search_result_file" : "result/deep-image-96-angular/raft_cagra/dim32"
+      "search_result_file" : "result/deep-image-96-inner/raft_cagra/dim32"
     },
 
     {
@@ -1000,13 +1000,13 @@
       "build_param": {
         "graph_degree" : 64
       },
-      "file" : "index/deep-image-96-angular/raft_cagra/dim64",
+      "file" : "index/deep-image-96-inner/raft_cagra/dim64",
       "search_params" : [
         {"itopk": 32},
         {"itopk": 64},
         {"itopk": 128}
       ],
-      "search_result_file" : "result/deep-image-96-angular/raft_cagra/dim64"
+      "search_result_file" : "result/deep-image-96-inner/raft_cagra/dim64"
     }
   ]
 }
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json
index fed7750172..b097aa7ca0 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json
@@ -1,8 +1,8 @@
 {
   "dataset": {
     "name": "gist-960-euclidean",
-    "base_file": "data/gist-960-euclidean/base.fbin",
-    "query_file": "data/gist-960-euclidean/query.fbin",
+    "base_file": "gist-960-euclidean/base.fbin",
+    "query_file": "gist-960-euclidean/query.fbin",
     "distance": "euclidean"
   },
   "search_basic_param": {
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json
index 8c2f8ee617..526aef2db0 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json
@@ -1,8 +1,8 @@
 {
   "dataset": {
     "name": "glove-100-angular",
-    "base_file": "data/glove-100-angular/base.fbin",
-    "query_file": "data/glove-100-angular/query.fbin",
+    "base_file": "glove-100-angular/base.fbin",
+    "query_file": "glove-100-angular/query.fbin",
     "distance": "euclidean"
   },
   "search_basic_param": {
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json
index a73ed1ec07..9b3f192c9f 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json
@@ -1,8 +1,8 @@
 {
   "dataset": {
     "name": "glove-50-angular",
-    "base_file": "data/glove-50-angular/base.fbin",
-    "query_file": "data/glove-50-angular/query.fbin",
+    "base_file": "glove-50-angular/base.fbin",
+    "query_file": "glove-50-angular/query.fbin",
     "distance": "euclidean"
   },
   "search_basic_param": {
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json
index b07e682268..e5a4ca6e5f 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json
@@ -1,8 +1,8 @@
 {
   "dataset": {
     "name": "lastfm-65-angular",
-    "base_file": "data/lastfm-65-angular/base.fbin",
-    "query_file": "data/lastfm-65-angular/query.fbin",
+    "base_file": "lastfm-65-angular/base.fbin",
+    "query_file": "lastfm-65-angular/query.fbin",
     "distance": "euclidean"
   },
   "search_basic_param": {
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json
index 362cc21083..2a493edeed 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json
@@ -1,8 +1,8 @@
 {
   "dataset": {
     "name": "mnist-784-euclidean",
-    "base_file": "data/mnist-784-euclidean/base.fbin",
-    "query_file": "data/mnist-784-euclidean/query.fbin",
+    "base_file": "mnist-784-euclidean/base.fbin",
+    "query_file": "mnist-784-euclidean/query.fbin",
     "distance": "euclidean"
   },
   "search_basic_param": {
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json
index 4c389bb6b7..630b700ba5 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json
@@ -1,8 +1,8 @@
 {
   "dataset": {
     "name": "nytimes-256-angular",
-    "base_file": "data/nytimes-256-angular/base.fbin",
-    "query_file": "data/nytimes-256-angular/query.fbin",
+    "base_file": "nytimes-256-angular/base.fbin",
+    "query_file": "nytimes-256-angular/query.fbin",
     "distance": "euclidean"
   },
   "search_basic_param": {

From d236090a2898fc3ebb597f297433c9ec8cc88464 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Thu, 31 Aug 2023 22:18:33 -0400
Subject: [PATCH 59/70] Adding data_export

---
 .../raft-ann-bench/data_export/__main__.py    | 88 +++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py

diff --git a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py
new file mode 100644
index 0000000000..fd9d00f43c
--- /dev/null
+++ b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import argparse
+import json
+import os
+
+import pandas as pd
+
+
+def read_file(dataset, dataset_path, method):
+    dir = os.path.join(dataset_path, dataset, "result", method)
+    for file in os.listdir(dir):
+        if file.endswith(".json"):
+            with open(os.path.join(dir, file), "r") as f:
+                data = json.load(f)
+                df = pd.DataFrame(data["benchmarks"])
+                yield (os.path.join(dir, file), file.split("-")[0], df)
+
+
+def convert_json_to_csv_build(dataset, dataset_path):
+    for file, algo_name, df in read_file(dataset, dataset_path, "build"):
+        df["name"] = df["name"].str.split("/").str[0]
+        write = pd.DataFrame(
+            {
+                "algo_name": [algo_name] * len(df),
+                "index_name": df["name"],
+                "time": df["real_time"],
+            }
+        )
+        write.to_csv(file.replace(".json", ".csv"), index=False)
+
+
+def convert_json_to_csv_search(dataset, dataset_path):
+    for file, algo_name, df in read_file(dataset, dataset_path, "search"):
+        df["name"] = df["name"].str.split("/").str[0]
+        write = pd.DataFrame(
+            {
+                "algo_name": [algo_name] * len(df),
+                "index_name": df["name"],
+                "recall": df["Recall"],
+                "qps": df["items_per_second"],
+            }
+        )
+        write.to_csv(file.replace(".json", ".csv"), index=False)
+
+
+def main():
+
+    call_path = os.getcwd()
+    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
+        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
+    else:
+        default_dataset_path = os.path.join(call_path, "datasets/")
+
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        "--dataset", help="dataset to download", default="glove-100-inner"
+    )
+    parser.add_argument(
+        "--dataset-path",
+        help="path to dataset folder",
+        default=default_dataset_path,
+    )
+
+    args = parser.parse_args()
+
+    convert_json_to_csv_build(args.dataset, args.dataset_path)
+    convert_json_to_csv_search(args.dataset, args.dataset_path)
+
+
+if __name__ == "__main__":
+    main()

From 998bf485ec321d860a759a88a75c208fbdd02fc8 Mon Sep 17 00:00:00 2001
From: divyegala <divyegala@gmail.com>
Date: Fri, 1 Sep 2023 09:13:07 -0700
Subject: [PATCH 60/70] fix dask pinnings in raft-dask recipe

---
 conda/recipes/raft-dask/meta.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml
index cf1f8488bc..c9caa4dd9b 100644
--- a/conda/recipes/raft-dask/meta.yaml
+++ b/conda/recipes/raft-dask/meta.yaml
@@ -60,10 +60,10 @@ requirements:
     - cudatoolkit
     {% endif %}
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
-    - dask ==2023.7.1
-    - dask-core ==2023.7.1
+    - dask >=2023.7.1
+    - dask-core >=2023.7.1
     - dask-cuda ={{ minor_version }}
-    - distributed ==2023.7.1
+    - distributed >=2023.7.1
     - joblib >=0.11
     - nccl >=2.9.9
     - pylibraft {{ version }}

From be85537ae15b3e32f8c9e8345358573053a26d5e Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Sep 2023 12:52:07 -0500
Subject: [PATCH 61/70] FIX PR review feedback and readme updates

---
 cpp/CMakeLists.txt                            |  2 +-
 cpp/bench/ann/CMakeLists.txt                  | 26 +++++-----------
 docs/source/raft_ann_benchmarks.md            | 20 ++++++-------
 .../src/raft-ann-bench/run/__main__.py        | 17 ++++++-----
 .../src/raft-ann-bench/run/algos-cpu.yaml     | 30 -------------------
 .../src/raft-ann-bench/run/algos.yaml         | 20 ++++++-------
 6 files changed, 36 insertions(+), 79 deletions(-)
 delete mode 100644 python/raft-ann-bench/src/raft-ann-bench/run/algos-cpu.yaml

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 6dd7ebe21a..5ffb74ef7f 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -20,7 +20,7 @@ include(rapids-cpm)
 include(rapids-export)
 include(rapids-find)
 
-option(BUILD_CPU_ONLY "Build CPU only components. Apples to RAFT ANN benchmarks currently" OFF)
+option(BUILD_CPU_ONLY "Build CPU only components. Applies to RAFT ANN benchmarks currently" OFF)
 
 if(NOT BUILD_CPU_ONLY)
   include(rapids-cuda)
diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index b224258708..7f6818acff 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -32,7 +32,7 @@ option(RAFT_ANN_BENCH_SINGLE_EXE
 
 find_package(Threads REQUIRED)
 
-if(BUILD_CPU_ONLY)
+if(CPU_ONLY)
   set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF)
   set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF)
   set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF)
@@ -66,17 +66,6 @@ if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ
   set(RAFT_ANN_BENCH_USE_RAFT ON)
 endif()
 
-option(RAFT_ANN_BENCH_USE_MULTIGPU "Use multi-gpus (where possible) in benchmarks" OFF)
-
-message(VERBOSE "RAFT: Build ann-bench with FAISS_BFKNN: ${RAFT_ANN_BENCH_USE_FAISS_BFKNN}")
-message(VERBOSE "RAFT: Build ann-bench with FAISS_IVF_FLAT: ${RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT}")
-message(VERBOSE "RAFT: Build ann-bench with FAISS_IVF_PQ: ${RAFT_ANN_BENCH_USE_FAISS_IVF_PQ}")
-message(VERBOSE "RAFT: Build ann-bench with RAFT_IVF_FLAT: ${RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT}")
-message(VERBOSE "RAFT: Build ann-bench with RAFT_IVF_PQ: ${RAFT_ANN_BENCH_USE_RAFT_IVF_PQ}")
-message(VERBOSE "RAFT: Build ann-bench with RAFT_CAGRA: ${RAFT_ANN_BENCH_USE_RAFT_CAGRA}")
-message(VERBOSE "RAFT: Build ann-bench with HNSWLIB: ${RAFT_ANN_BENCH_USE_HNSWLIB}")
-message(VERBOSE "RAFT: Build ann-bench with GGNN: ${RAFT_ANN_BENCH_USE_GGNN}")
-message(VERBOSE "RAFT: Build ann-bench with MULTIGPU: ${RAFT_ANN_BENCH_USE_MULTIGPU}")
 
 # ##################################################################################################
 # * Fetch requirements -------------------------------------------------------------
@@ -103,6 +92,10 @@ function(ConfigureAnnBench)
   set(oneValueArgs NAME)
   set(multiValueArgs PATH LINKS CXXFLAGS INCLUDES)
 
+  if(NOT BUILD_CPU_ONLY)
+    set(GPU_BUILD ON)
+  endif()
+
   cmake_parse_arguments(
     ConfigureAnnBench "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}
   )
@@ -120,16 +113,11 @@ function(ConfigureAnnBench)
     target_link_libraries(${BENCH_NAME} PRIVATE benchmark::benchmark)
   endif()
 
-  if(NOT BUILD_CPU_ONLY)
-    set(GPU_BUILD ON)
-  else()
-    target_compile_definitions(${BENCH_NAME} PUBLIC CPU_ONY)
-  endif()
-
   target_link_libraries(
     ${BENCH_NAME}
     PRIVATE raft::raft
             nlohmann_json::nlohmann_json
+            $<$<BOOL:${GPU_BUILD}>:$<$<BOOL:${RAFT_ANN_BENCH_USE_MULTIGPU}>:NCCL::NCCL>>
             ${ConfigureAnnBench_LINKS}
             Threads::Threads
             $<$<BOOL:${GPU_BUILD}>:${RAFT_CTK_MATH_DEPENDENCIES}>
@@ -290,7 +278,7 @@ if(RAFT_ANN_BENCH_SINGLE_EXE)
   target_compile_definitions(
     ANN_BENCH
     PRIVATE
-      $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH}">
+      $<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}">
       $<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
   )
 
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index 2475bd8061..cc9a81a384 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -4,15 +4,15 @@ This project provides a benchmark program for various ANN search implementations
 
 ## Installing the benchmarks
 
-The easiest way to install these benchmarks is through conda. We suggest using mamba as it generally leads to a faster install time::
+The easiest way to install these benchmarks is through conda. We provide packages for GPU enabled systems, as well for systems without a GPU. We suggest using mamba as it generally leads to a faster install time:
+
 ```bash
-git clone https://github.com/rapidsai/raft.git && cd raft
-export RAFT_HOME=$(pwd)
 
-mamba env create --name raft_ann_benchmarks -f conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
+mamba env create --name raft_ann_benchmarks
 conda activate raft_ann_benchmarks
 
-mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-ann-bench cudatoolkit=11.8*
+# to install GPU package:
+mamba install -c rapidsai -c conda-forge -c nvidia raft-ann-bench cuda-version=11.8*
 ```
 The channel `rapidsai` can easily be substituted `rapidsai-nightly` if nightly benchmarks are desired.
 
@@ -38,20 +38,18 @@ expected to be defined to run these scripts; this variable holds the directory w
 The steps below demonstrate how to download, install, and run benchmarks on a subset of 10M vectors from the Yandex Deep-1B dataset.
 
 ```bash
-export RAFT_HOME=$(pwd)
-# All scripts are present in directory raft/bench/ann
 
 # (1) prepare dataset
-python bench/ann/get_dataset.py --dataset deep-image-96-angular --normalize
+python -m raft-ann-bench.get_dataset --dataset deep-image-96-angular --normalize
 
 # (2) build and search index
-python bench/ann/run.py --dataset deep-image-96-inner
+python -m raft-ann-bench.run --dataset deep-image-96-inner
 
 # (3) export data
-python bench/ann/data_export.py --dataset deep-image-96-inner
+python -m raft-ann-bench.data_export --dataset deep-image-96-inner
 
 # (4) plot results
-python bench/ann/plot.py --dataset deep-image-96-inner
+python -m raft-ann-bench.plot --dataset deep-image-96-inner
 ```
 
 Configuration files already exist for the following list of the million-scale datasets. These all work out-of-the-box with the `--dataset` argument. Other million-scale datasets from `ann-benchmarks.com` will work, but will require a json configuration file to be created in `bench/ann/conf`.
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
index 32f6359a47..653763ecb4 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
@@ -34,9 +34,9 @@ def positive_int(input_str: str) -> int:
     return i
 
 
-def validate_algorithm(algos_conf, algo):
+def validate_algorithm(algos_conf, algo, gpu_present):
     algos_conf_keys = set(algos_conf.keys())
-    return algo in algos_conf_keys and not algos_conf[algo]["disabled"]
+    return algo in algos_conf_keys and algos_conf[algo]["requires_gpu"] == gpu_present
 
 
 def find_executable(algos_conf, algo, k, batch_size):
@@ -136,14 +136,15 @@ def run_build_and_search(
 def main():
     scripts_path = os.path.dirname(os.path.realpath(__file__))
     call_path = os.getcwd()
+
     # Read list of allowed algorithms
     try:
         import pylibraft  # noqa: F401
-
-        algo_file = "algos.yaml"
+        gpu_present = True
     except ImportError:
-        algo_file = "algos-cpu.yaml"
-    with open(f"{scripts_path}/{algo_file}", "r") as f:
+        gpu_present = False
+
+    with open(f"{scripts_path}/algos.yaml", "r") as f:
         algos_conf = yaml.safe_load(f)
 
     if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
@@ -252,7 +253,7 @@ def main():
         for index in conf_file["index"]:
             curr_algo = index["algo"]
             if index["name"] in indices and validate_algorithm(
-                algos_conf, curr_algo
+                algos_conf, curr_algo, gpu_present
             ):
                 executable_path = find_executable(
                     algos_conf, curr_algo, k, batch_size
@@ -269,7 +270,7 @@ def main():
         for index in conf_file["index"]:
             curr_algo = index["algo"]
             if curr_algo in algorithms and validate_algorithm(
-                algos_conf, curr_algo
+                algos_conf, curr_algo, gpu_present
             ):
                 executable_path = find_executable(
                     algos_conf, curr_algo, k, batch_size
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/algos-cpu.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/algos-cpu.yaml
deleted file mode 100644
index cb63d0920c..0000000000
--- a/python/raft-ann-bench/src/raft-ann-bench/run/algos-cpu.yaml
+++ /dev/null
@@ -1,30 +0,0 @@
-faiss_gpu_ivf_flat:
-  executable: FAISS_IVF_FLAT_ANN_BENCH
-  disabled: true
-faiss_gpu_flat:
-  executable: FAISS_IVF_FLAT_ANN_BENCH
-  disabled: true
-faiss_gpu_ivf_pq:
-  executable: FAISS_IVF_PQ_ANN_BENCH
-  disabled: true
-faiss_gpu_ivf_sq:
-  executable: FAISS_IVF_PQ_ANN_BENCH
-  disabled: true
-faiss_gpu_bfknn:
-  executable: FAISS_BFKNN_ANN_BENCH
-  disabled: true
-raft_ivf_flat:
-  executable: RAFT_IVF_FLAT_ANN_BENCH
-  disabled: true
-raft_ivf_pq:
-  executable: RAFT_IVF_PQ_ANN_BENCH
-  disabled: true
-raft_cagra:
-  executable: RAFT_CAGRA_ANN_BENCH
-  disabled: true
-ggnn:
-  executable: GGNN_ANN_BENCH
-  disabled: true
-hnswlib:
-  executable: HNSWLIB_ANN_BENCH
-  disabled: false
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml
index 5f554fc46b..faf0e8673d 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml
@@ -1,30 +1,30 @@
 faiss_gpu_ivf_flat:
   executable: FAISS_IVF_FLAT_ANN_BENCH
-  disabled: false
+  requires_gpu: true
 faiss_gpu_flat:
   executable: FAISS_IVF_FLAT_ANN_BENCH
-  disabled: false
+  requires_gpu: true
 faiss_gpu_ivf_pq:
   executable: FAISS_IVF_PQ_ANN_BENCH
-  disabled: false
+  requires_gpu: true
 faiss_gpu_ivf_sq:
   executable: FAISS_IVF_PQ_ANN_BENCH
-  disabled: false
+  requires_gpu: true
 faiss_gpu_bfknn:
   executable: FAISS_BFKNN_ANN_BENCH
-  disabled: false
+  requires_gpu: true
 raft_ivf_flat:
   executable: RAFT_IVF_FLAT_ANN_BENCH
-  disabled: false
+  requires_gpu: true
 raft_ivf_pq:
   executable: RAFT_IVF_PQ_ANN_BENCH
-  disabled: false
+  requires_gpu: true
 raft_cagra:
   executable: RAFT_CAGRA_ANN_BENCH
-  disabled: false
+  requires_gpu: true
 ggnn:
   executable: GGNN_ANN_BENCH
-  disabled: false
+  requires_gpu: true
 hnswlib:
   executable: HNSWLIB_ANN_BENCH
-  disabled: false
\ No newline at end of file
+  requires_gpu: true

From 076d2de4259eeb00f895dadfdea4fe196fc2481b Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Sep 2023 13:00:43 -0500
Subject: [PATCH 62/70] DOC doc updates

---
 docs/source/raft_ann_benchmarks.md | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index cc9a81a384..a547289a5c 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -13,8 +13,12 @@ conda activate raft_ann_benchmarks
 
 # to install GPU package:
 mamba install -c rapidsai -c conda-forge -c nvidia raft-ann-bench cuda-version=11.8*
+
+# to install CPU package for usage in CPU-only systems:
+mamba install -c rapidsai -c conda-forge  raft-ann-bench-cpu
 ```
-The channel `rapidsai` can easily be substituted `rapidsai-nightly` if nightly benchmarks are desired.
+
+The channel `rapidsai` can easily be substituted `rapidsai-nightly` if nightly benchmarks are desired. The CPU package currently allows to run the HNSW benchmarks.
 
 Please see the [build instructions](ann_benchmarks_build.md) to build the benchmarks from source.
 
@@ -131,11 +135,11 @@ An entry in `algos.yaml` looks like:
 ```yaml
 raft_ivf_pq:
   executable: RAFT_IVF_PQ_ANN_BENCH
-  disabled: false
+  requires_gpu: true
 ```
 `executable` : specifies the name of the binary that will build/search the index. It is assumed to be
 available in `raft/cpp/build/`.
-`disabled` : denotes whether an algorithm should be excluded from benchmark runs.
+`requires_gpu` : denotes whether an algorithm requires GPU to run.
 
 The usage of the script `bench/ann/run.py` is:
 ```bash

From c6014a95361ac6149dd4054b1133b455836af54e Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Sep 2023 13:13:24 -0500
Subject: [PATCH 63/70] FIX pep8

---
 python/raft-ann-bench/src/raft-ann-bench/run/__main__.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
index 653763ecb4..c8324d7a24 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
@@ -36,7 +36,10 @@ def positive_int(input_str: str) -> int:
 
 def validate_algorithm(algos_conf, algo, gpu_present):
     algos_conf_keys = set(algos_conf.keys())
-    return algo in algos_conf_keys and algos_conf[algo]["requires_gpu"] == gpu_present
+    return (
+        algo in algos_conf_keys
+        and algos_conf[algo]["requires_gpu"] == gpu_present
+    )
 
 
 def find_executable(algos_conf, algo, k, batch_size):
@@ -140,6 +143,7 @@ def main():
     # Read list of allowed algorithms
     try:
         import pylibraft  # noqa: F401
+
         gpu_present = True
     except ImportError:
         gpu_present = False

From 5a12ce3365b458f50e62c1bfe02858fd0f29fb02 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Sep 2023 17:33:48 -0500
Subject: [PATCH 64/70] FIX docs and plot datasets path

---
 docs/source/raft_ann_benchmarks.md            | 42 +++++++++----------
 .../src/raft-ann-bench/plot/__main__.py       | 12 +++++-
 .../src/raft-ann-bench/run/__main__.py        |  4 +-
 3 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index a547289a5c..e0c02bb7eb 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -39,11 +39,11 @@ expected to be defined to run these scripts; this variable holds the directory w
 
 ### End-to-end example: Million-scale
 
-The steps below demonstrate how to download, install, and run benchmarks on a subset of 10M vectors from the Yandex Deep-1B dataset.
+The steps below demonstrate how to download, install, and run benchmarks on a subset of 10M vectors from the Yandex Deep-1B dataset By default the datasets will be stored and used from the folder indicated by the RAPIDS_DATASET_ROOT_DIR environment variable if defined, otherwise a datasets subfolder from where the script is being called:
 
 ```bash
 
-# (1) prepare dataset
+# (1) prepare dataset.
 python -m raft-ann-bench.get_dataset --dataset deep-image-96-angular --normalize
 
 # (2) build and search index
@@ -56,7 +56,7 @@ python -m raft-ann-bench.data_export --dataset deep-image-96-inner
 python -m raft-ann-bench.plot --dataset deep-image-96-inner
 ```
 
-Configuration files already exist for the following list of the million-scale datasets. These all work out-of-the-box with the `--dataset` argument. Other million-scale datasets from `ann-benchmarks.com` will work, but will require a json configuration file to be created in `bench/ann/conf`.
+Configuration files already exist for the following list of the million-scale datasets. These all work out-of-the-box with the `--dataset` argument. Other million-scale datasets from `ann-benchmarks.com` will work, but will require a json configuration file to be created in `python/raft-ann-bench/src/raft-ann-bench/conf`.
 - `deep-image-96-angular`
 - `fashion-mnist-784-euclidean`
 - `glove-50-angular`
@@ -67,7 +67,7 @@ Configuration files already exist for the following list of the million-scale da
 - `sift-128-euclidean`
 
 ### End-to-end example: Billion-scale
-`bench/ann/get_dataset.py` cannot be used to download the [billion-scale datasets](ann_benchmarks_dataset.md#billion-scale) 
+`raft-ann-bench.get_dataset` cannot be used to download the [billion-scale datasets](ann_benchmarks_dataset.md#billion-scale)
 because they are so large. You should instead use our billion-scale datasets guide to download and prepare them.
 All other python  mentioned below work as intended once the
 billion-scale dataset has been downloaded.
@@ -75,27 +75,25 @@ To download Billion-scale datasets, visit [big-ann-benchmarks](http://big-ann-be
 
 The steps below demonstrate how to download, install, and run benchmarks on a subset of 100M vectors from the Yandex Deep-1B dataset. Please note that datasets of this scale are recommended for GPUs with larger amounts of memory, such as the A100 or H100. 
 ```bash
-export RAFT_HOME=$(pwd)
-# All scripts are present in directory raft/bench/ann
 
-mkdir -p bench/ann/data/deep-1B
+mkdir -p datasets/deep-1B
 # (1) prepare dataset
 # download manually "Ground Truth" file of "Yandex DEEP"
 # suppose the file name is deep_new_groundtruth.public.10K.bin
-python bench/ann/split_groundtruth.py --groundtruth bench/ann/data/deep-1B/deep_new_groundtruth.public.10K.bin
+python python -m raft-ann-bench.split_groundtruth --groundtruth datasets/deep-1B/deep_new_groundtruth.public.10K.bin
 # two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced
 
 # (2) build and search index
-python bench/ann/run.py --dataset deep-1B
+python python -m raft-ann-bench.run --dataset deep-1B
 
 # (3) export data
-python bench/ann/data_export.py --dataset deep-1B
+python python -m raft-ann-bench.data_export --dataset deep-1B
 
 # (4) plot results
-python bench/ann/plot.py --dataset deep-1B
+python python -m raft-ann-bench.plot --dataset deep-1B
 ```
 
-The usage of `bench/ann/split-groundtruth.py` is:
+The usage of `python -m raft-ann-bench.split-groundtruth` is:
 ```bash
 usage: split_groundtruth.py [-h] --groundtruth GROUNDTRUTH
 
@@ -106,7 +104,7 @@ options:
 ```
 
 ##### Step 1: Prepare Dataset<a id='prep-dataset'></a>
-The script `bench/ann/get_dataset.py` will download and unpack the dataset in directory
+The script `raft-ann-bench.get_dataset` will download and unpack the dataset in directory
 that the user provides. As of now, only million-scale datasets are supported by this
 script. For more information on [datasets and formats](ann_benchmarks_dataset.md).
 
@@ -118,13 +116,13 @@ options:
   -h, --help            show this help message and exit
   --dataset DATASET     dataset to download (default: glove-100-angular)
   --dataset-path DATASET_PATH
-                        path to download dataset (default: ${RAFT_HOME}/bench/ann/data)
+                        path to download dataset (default: ${RAPIDS_DATASET_ROOT_DIR})
   --normalize           normalize cosine distance to inner product (default: False)
 ```
 
 When option `normalize` is provided to the script, any dataset that has cosine distances
 will be normalized to inner product. So, for example, the dataset `glove-100-angular` 
-will be written at location `${RAFT_HOME}/bench/ann/data/glove-100-inner/`.
+will be written at location `datasets/glove-100-inner/`.
 
 #### Step 2: Build and Search Index
 The script `bench/ann/run.py` will build and search indices for a given dataset and its
@@ -141,7 +139,7 @@ raft_ivf_pq:
 available in `raft/cpp/build/`.
 `requires_gpu` : denotes whether an algorithm requires GPU to run.
 
-The usage of the script `bench/ann/run.py` is:
+The usage of the script `raft-ann-bench.run` is:
 ```bash
 usage: run.py [-h] [-k COUNT] [-bs BATCH_SIZE] [--configuration CONFIGURATION] [--dataset DATASET] [--dataset-path DATASET_PATH] [--build] [--search] [--algorithms ALGORITHMS] [--indices INDICES]
               [-f]
@@ -156,7 +154,7 @@ options:
                         path to configuration file for a dataset (default: None)
   --dataset DATASET     dataset whose configuration file will be used (default: glove-100-inner)
   --dataset-path DATASET_PATH
-                        path to dataset folder (default: ${RAFT_HOME}/bench/ann/data)
+                        path to dataset folder (default: ${RAPIDS_DATASET_ROOT_DIR})
   --build
   --search
   --algorithms ALGORITHMS
@@ -168,7 +166,7 @@ options:
 `configuration` and `dataset` : `configuration` is a path to a configuration file for a given dataset.
 The configuration file should be name as `<dataset>.json`. It is optional if the name of the dataset is
 provided with the `dataset` argument, in which case
-a configuration file will be searched for as `${RAFT_HOME}/bench/ann/conf/<dataset>.json`.
+a configuration file will be searched for as `python/raft-ann-bench/src/raft-ann-bench/run/conf/<dataset>.json`.
 For every algorithm run by this script, it outputs an index build statistics JSON file in `<dataset-path/<dataset>/result/build/<algo-k{k}-batch_size{batch_size}.json>`
 and an index search statistics JSON file in `<dataset-path/<dataset>/result/search/<algo-k{k}-batch_size{batch_size}.json>`.
 
@@ -184,8 +182,8 @@ it is assumed both are `True`.
 is available in `algos.yaml` and not disabled, as well as having an associated executable.
 
 #### Step 3: Data Export
-The script `bench/ann/data_export.py` will convert the intermediate JSON outputs produced by `bench/ann/run.py` to more
-easily readable CSV files, which are needed to build charts made by `bench/ann/plot.py`.
+The script `bench/ann/data_export.py` will convert the intermediate JSON outputs produced by `raft-ann-bench.run` to more
+easily readable CSV files, which are needed to build charts made by `raft-ann-bench.plot`.
 
 ```bash
 usage: data_export.py [-h] [--dataset DATASET] [--dataset-path DATASET_PATH]
@@ -194,7 +192,7 @@ options:
   -h, --help            show this help message and exit
   --dataset DATASET     dataset to download (default: glove-100-inner)
   --dataset-path DATASET_PATH
-                        path to dataset folder (default: ${RAFT_HOME}/bench/ann/data)
+                        path to dataset folder (default: ${RAPIDS_DATASET_ROOT_DIR})
 ```
 Build statistics CSV file is stored in `<dataset-path/<dataset>/result/build/<algo-k{k}-batch_size{batch_size}.csv>`
 and index search statistics CSV file in `<dataset-path/<dataset>/result/search/<algo-k{k}-batch_size{batch_size}.csv>`.
@@ -212,7 +210,7 @@ options:
   -h, --help            show this help message and exit
   --dataset DATASET     dataset to download (default: glove-100-inner)
   --dataset-path DATASET_PATH
-                        path to dataset folder (default: ${RAFT_HOME}/bench/ann/data)
+                        path to dataset folder (default: ${RAPIDS_DATASET_ROOT_DIR})
   --output-filepath OUTPUT_FILEPATH
                         directory for PNG to be saved (default: os.getcwd())
   --algorithms ALGORITHMS
diff --git a/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py
index 6eca2dfc44..198d0a2b14 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/plot/__main__.py
@@ -380,6 +380,12 @@ def load_all_results(
 
 
 def main():
+    call_path = os.getcwd()
+    if "RAPIDS_DATASET_ROOT_DIR" in os.environ:
+        default_dataset_path = os.getenv("RAPIDS_DATASET_ROOT_DIR")
+    else:
+        default_dataset_path = os.path.join(call_path, "datasets/")
+
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter
     )
@@ -388,8 +394,10 @@ def main():
     )
     parser.add_argument(
         "--dataset-path",
-        help="path to dataset folder",
-        default=os.path.join(os.getenv("RAFT_HOME"), "bench", "ann", "data"),
+        help="path to dataset folder, by default will look in "
+        "RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets "
+        "subdirectory from the calling directory",
+        default=default_dataset_path,
     )
     parser.add_argument(
         "--output-filepath",
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
index c8324d7a24..65febace8b 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
@@ -185,7 +185,9 @@ def main():
     )
     parser.add_argument(
         "--dataset-path",
-        help="path to dataset folder",
+        help="path to dataset folder, by default will look in "
+        "RAPIDS_DATASET_ROOT_DIR if defined, otherwise a datasets "
+        "subdirectory from the calling directory",
         default=default_dataset_path,
     )
     parser.add_argument("--build", action="store_true")

From fbdc1fa2ea72a51dc6d00ab12846ed64ac5c6c60 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Sep 2023 18:15:56 -0500
Subject: [PATCH 65/70] FIX found typo in cmake

---
 cpp/bench/ann/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt
index 7f6818acff..f6bd60f60d 100644
--- a/cpp/bench/ann/CMakeLists.txt
+++ b/cpp/bench/ann/CMakeLists.txt
@@ -32,7 +32,7 @@ option(RAFT_ANN_BENCH_SINGLE_EXE
 
 find_package(Threads REQUIRED)
 
-if(CPU_ONLY)
+if(BUILD_CPU_ONLY)
   set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF)
   set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF)
   set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF)

From 954aa87372d3ae21bcf8b4c3f13ae4e1ba541e6d Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Sep 2023 18:18:47 -0500
Subject: [PATCH 66/70] FIX missing parameter in python

---
 python/raft-ann-bench/src/raft-ann-bench/run/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
index 65febace8b..7f46304ab8 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
@@ -289,7 +289,7 @@ def main():
     else:
         for index in conf_file["index"]:
             curr_algo = index["algo"]
-            if validate_algorithm(algos_conf, curr_algo):
+            if validate_algorithm(algos_conf, curr_algo, gpu_present):
                 executable_path = find_executable(
                     algos_conf, curr_algo, k, batch_size
                 )

From 15b0dc0b9fc7b02a42a305b28a83c23c5ca9b929 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Sep 2023 18:31:46 -0500
Subject: [PATCH 67/70] FIX correct conditional

---
 .../raft-ann-bench/src/raft-ann-bench/run/__main__.py | 11 +++++++----
 .../raft-ann-bench/src/raft-ann-bench/run/algos.yaml  |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
index 7f46304ab8..d5a65ddfb7 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
@@ -36,10 +36,13 @@ def positive_int(input_str: str) -> int:
 
 def validate_algorithm(algos_conf, algo, gpu_present):
     algos_conf_keys = set(algos_conf.keys())
-    return (
-        algo in algos_conf_keys
-        and algos_conf[algo]["requires_gpu"] == gpu_present
-    )
+    if gpu_present:
+        return algo in algos_conf_keys
+    else:
+        return (
+            algo in algos_conf_keys
+            and algos_conf[algo]["requires_gpu"] is False
+        )
 
 
 def find_executable(algos_conf, algo, k, batch_size):
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml
index faf0e8673d..30abe0dda6 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml
@@ -27,4 +27,4 @@ ggnn:
   requires_gpu: true
 hnswlib:
   executable: HNSWLIB_ANN_BENCH
-  requires_gpu: true
+  requires_gpu: false

From d863ce6e43fd4f7f2eadfcb1f54808adc2cdaa49 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Sep 2023 19:17:39 -0500
Subject: [PATCH 68/70] FIX for single gpu arch detection in CMake

---
 build.sh           |  2 +-
 cpp/CMakeLists.txt | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/build.sh b/build.sh
index 8844d62510..53566a3857 100755
--- a/build.sh
+++ b/build.sh
@@ -428,7 +428,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
           -DBUILD_TESTS=${BUILD_TESTS} \
           -DBUILD_PRIMS_BENCH=${BUILD_PRIMS_BENCH} \
           -DBUILD_ANN_BENCH=${BUILD_ANN_BENCH} \
-          -DBUILD_CPU_ONLY=${BUILD_CPU_ONLY} \
+          -DBUILD_CPU_ONLY=${yea} \
           -DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \
           ${CACHE_ARGS} \
           ${EXTRA_CMAKE_ARGS}
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5ffb74ef7f..8c13a649f1 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -22,21 +22,21 @@ include(rapids-find)
 
 option(BUILD_CPU_ONLY "Build CPU only components. Applies to RAFT ANN benchmarks currently" OFF)
 
+# workaround for rapids_cuda_init_architectures not working for arch detection with enable_language(CUDA)
+set(lang_list "CXX")
+
 if(NOT BUILD_CPU_ONLY)
   include(rapids-cuda)
   rapids_cuda_init_architectures(RAFT)
+  list(APPEND lang_list "CUDA")
 endif()
 
 project(
   RAFT
   VERSION ${RAFT_VERSION}
-  LANGUAGES CXX
+  LANGUAGES ${lang_list}
 )
 
-if(NOT BUILD_CPU_ONLY)
-  enable_language(CUDA)
-endif()
-
 # Write the version header
 rapids_cmake_write_version_file(include/raft/version_config.hpp)
 

From 0d60c563d74c304de4781dc83588ac0c16713331 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Fri, 1 Sep 2023 19:56:14 -0500
Subject: [PATCH 69/70] FIX PR review fixes and a {yea}

---
 build.sh           | 4 ++--
 ci/build_python.sh | 5 +++--
 cpp/CMakeLists.txt | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/build.sh b/build.sh
index 53566a3857..071820ba93 100755
--- a/build.sh
+++ b/build.sh
@@ -154,7 +154,7 @@ function limitTests {
             # Remove the full LIMIT_TEST_TARGETS argument from list of args so that it passes validArgs function
             ARGS=${ARGS//--limit-tests=$LIMIT_TEST_TARGETS/}
             TEST_TARGETS=${LIMIT_TEST_TARGETS}
-        echo "Limiting tests to $TEST_TARGETS"
+            echo "Limiting tests to $TEST_TARGETS"
         fi
     fi
 }
@@ -428,7 +428,7 @@ if (( ${NUMARGS} == 0 )) || hasArg libraft || hasArg docs || hasArg tests || has
           -DBUILD_TESTS=${BUILD_TESTS} \
           -DBUILD_PRIMS_BENCH=${BUILD_PRIMS_BENCH} \
           -DBUILD_ANN_BENCH=${BUILD_ANN_BENCH} \
-          -DBUILD_CPU_ONLY=${yea} \
+          -DBUILD_CPU_ONLY=${BUILD_CPU_ONLY} \
           -DCMAKE_MESSAGE_LOG_LEVEL=${CMAKE_LOG_LEVEL} \
           ${CACHE_ARGS} \
           ${EXTRA_CMAKE_ARGS}
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 4f129c60c6..2a31deb46a 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -33,9 +33,10 @@ rapids-mamba-retry mambabuild \
 --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
 conda/recipes/raft-ann-bench
 
-# Build ann-bench-cpu only in CUDA 12 jobs since it only depends on python
+# Build ann-bench-cpu only in CUDA 11 jobs since it only depends on python
 # version
-if [[ ${CUDA_VERSION} == "11.8.0" ]]; then
+RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
+if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
   rapids-mamba-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8c13a649f1..d93b19f784 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -153,7 +153,7 @@ if(NOT BUILD_CPU_ONLY)
     INSTALL_EXPORT_SET raft-exports
   )
 else()
-  add_definitions(-DBUILD_CPU_ONLY)
+  add_compile_definitions(BUILD_CPU_ONLY)
 endif()
 
 if(NOT DISABLE_OPENMP)

From 1274b21e3051f0eea38d0b9aaad9e698f1f30347 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 5 Sep 2023 10:36:17 -0400
Subject: [PATCH 70/70] Update util.hpp

Co-authored-by: Artem M. Chirkin <9253178+achirkin@users.noreply.github.com>
---
 cpp/bench/ann/src/common/util.hpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/cpp/bench/ann/src/common/util.hpp b/cpp/bench/ann/src/common/util.hpp
index 5c9c053057..e9e4a9ad21 100644
--- a/cpp/bench/ann/src/common/util.hpp
+++ b/cpp/bench/ann/src/common/util.hpp
@@ -91,15 +91,9 @@ struct buf {
 
 struct cuda_timer {
  private:
-#ifndef BUILD_CPU_ONLY
   cudaStream_t stream_{nullptr};
   cudaEvent_t start_{nullptr};
   cudaEvent_t stop_{nullptr};
-#else
-  cudaStream_t stream_{0};
-  cudaEvent_t start_{0};
-  cudaEvent_t stop_{0};
-#endif
   double total_time_{0};
 
  public: