rapidsai · rapids-bot · Aug 22, 2023 · Aug 18, 2023 · Aug 18, 2023 · Aug 18, 2023
diff --git a/README.md b/README.md
@@ -318,6 +318,7 @@ The [build](docs/source/build.md) instructions contain more details on building
 
 The folder structure mirrors other RAPIDS repos, with the following folders:
 
+- `bench/ann`: Python scripts for running ANN benchmarks
 - `ci`: Scripts for running CI in PRs
 - `conda`: Conda recipes and development conda environments
 - `cpp`: Source code for C++ libraries.

diff --git a/bench/ann/data_export.py b/bench/ann/data_export.py
@@ -19,18 +19,18 @@
 
 
 def export_results(output_filepath, recompute, groundtruth_filepath,
-                   result_filepaths):
+                   result_filepath):
     print(f"Writing output file to: {output_filepath}")
     ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
                                          "cpp/bench/ann/scripts")
     ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
                                           "eval.pl")
     if recompute:
         p = subprocess.Popen([ann_bench_scripts_path, "-f", "-o", output_filepath,
-                              groundtruth_filepath] + result_filepaths)
+                              groundtruth_filepath, result_filepath])
     else:
         p = subprocess.Popen([ann_bench_scripts_path, "-o", output_filepath,
-                              groundtruth_filepath] + result_filepaths)
+                              groundtruth_filepath, result_filepath])
     p.wait()
 
 
@@ -51,16 +51,13 @@ def main():
                              "bench", "ann", "data")
     )
 
-    args, result_filepaths = parser.parse_known_args()
-
-    # if nothing is provided
-    if len(result_filepaths) == 0:
-        raise ValueError("No filepaths to results were provided")
+    args = parser.parse_args()
 
+    result_filepath = os.path.join(args.dataset_path, args.dataset, "result")
     groundtruth_filepath = os.path.join(args.dataset_path, args.dataset, 
                                         "groundtruth.neighbors.ibin")
     export_results(args.output, args.recompute, groundtruth_filepath,
-                   result_filepaths)
+                   result_filepath)
 
 
 if __name__ == "__main__":

diff --git a/bench/ann/run.py b/bench/ann/run.py
@@ -146,8 +146,14 @@ def main():
         conf_file = json.load(f)
 
     # Replace base, query to dataset-path
-    conf_file["dataset"]["base_file"] = os.path.join(dataset_path, "base.fbin")
-    conf_file["dataset"]["query_file"] = os.path.join(dataset_path, "query.fbin")
+    replacement_base_filepath = \
+        os.path.normpath(conf_file["dataset"]["base_file"]).split(os.path.sep)[-1]
+    conf_file["dataset"]["base_file"] = \
+        os.path.join(dataset_path, replacement_base_filepath)
+    replacement_query_filepath = \
+        os.path.normpath(conf_file["dataset"]["query_file"]).split(os.path.sep)[-1]
+    conf_file["dataset"]["query_file"] = \
+        os.path.join(dataset_path, replacement_query_filepath)
     # Ensure base and query files exist for dataset
     if not os.path.exists(conf_file["dataset"]["base_file"]):
         raise FileNotFoundError(conf_file["dataset"]["base_file"])

diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
@@ -6,13 +6,13 @@ This project provides a benchmark program for various ANN search implementations
 
 The easiest way to install these benchmarks is through conda. We suggest using mamba as it generally leads to a faster install time::
 ```bash
+git clone https://github.com/rapidsai/raft.git && cd raft
+export RAFT_HOME=$(pwd)
+
 mamba env create --name raft_ann_benchmarks -f conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
 conda activate raft_ann_benchmarks
 
 mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-ann-bench cudatoolkit=11.8*
-
-git clone https://github.com/rapidsai/raft.git && cd raft
-export RAFT_HOME=$(pwd)
 ```
 The channel `rapidsai` can easily be substituted `rapidsai-nightly` if nightly benchmarks are desired.
 
@@ -35,50 +35,50 @@ expected to be defined to run these scripts; this variable holds the directory w
 ### End-to-end example: Million-scale
 ```bash
 export RAFT_HOME=$(pwd)
-# All scripts are present in directory raft/scripts/ann-benchmarks
+# All scripts are present in directory raft/bench/ann
 
 # (1) prepare dataset
-python scripts/ann-benchmarks/get_dataset.py --dataset glove-100-angular --normalize
+python bench/ann/get_dataset.py --dataset glove-100-angular --normalize
 
 # (2) build and search index
-python scripts/ann-benchmarks/run.py --configuration bench/ann/conf/glove-100-inner.json
+python bench/ann/run.py --dataset glove-100-inner
 
 # (3) evaluate results
-python scripts/ann-benchmarks/data_export.py --output out.csv --dataset glove-100-inner result/glove-100-inner/
+python bench/ann/data_export.py --output out.csv --dataset glove-100-inner
 
 # (4) plot results
-python scripts/ann-benchmarks/plot.py --result-csv out.csv
+python bench/ann/plot.py --result-csv out.csv
 ```
 
 ### End-to-end example: Billion-scale
-`scripts/get_dataset.py` cannot be used to download the [billion-scale datasets](ann_benchmarks_dataset.md#billion-scale) 
+`bench/ann/get_dataset.py` cannot be used to download the [billion-scale datasets](ann_benchmarks_dataset.md#billion-scale) 
 because they are so large. You should instead use our billion-scale datasets guide to download and prepare them.
-All other python scripts mentioned below work as intended once the
+All other python  mentioned below work as intended once the
 billion-scale dataset has been downloaded.
 To download Billion-scale datasets, visit [big-ann-benchmarks](http://big-ann-benchmarks.com/neurips21.html)
 
 ```bash
 export RAFT_HOME=$(pwd)
-# All scripts are present in directory raft/scripts/ann-benchmarks
+# All scripts are present in directory raft/bench/ann
 
 mkdir -p data/deep-1B
 # (1) prepare dataset
 # download manually "Ground Truth" file of "Yandex DEEP"
 # suppose the file name is deep_new_groundtruth.public.10K.bin
-python scripts/ann-benchmarks/split_groundtruth.py --groundtruth data/deep-1B/deep_new_groundtruth.public.10K.bin
+python bench/ann/split_groundtruth.py --groundtruth data/deep-1B/deep_new_groundtruth.public.10K.bin
 # two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced
 
 # (2) build and search index
-python scripts/ann-benchmarks/run.py --configuration bench/ann/conf/deep-1B.json
+python bench/ann/run.py --dataset deep-1B
 
 # (3) evaluate results
-python scripts/ann-benchmarks/data_export.py --output out.csv --dataset deep-1B result/deep-1B/
+python bench/ann/data_export.py --output out.csv --dataset deep-1B
 
 # (4) plot results
-python scripts/ann-benchmarks/plot.py --result-csv out.csv
+python bench/ann/plot.py --result-csv out.csv
 ```
 
-The usage of `scripts/ann-benchmarks/split-groundtruth.py` is:
+The usage of `bench/ann/split-groundtruth.py` is:
 ```bash
 usage: split_groundtruth.py [-h] --groundtruth GROUNDTRUTH
 
@@ -89,7 +89,7 @@ options:
 ```
 
 ##### Step 1: Prepare Dataset<a id='prep-dataset'></a>
-The script `scripts/ann-benchmarks/get_dataset.py` will download and unpack the dataset in directory
+The script `bench/ann/get_dataset.py` will download and unpack the dataset in directory
 that the user provides. As of now, only million-scale datasets are supported by this
 script. For more information on [datasets and formats](ann_benchmarks_dataset.md).
 
@@ -107,9 +107,10 @@ options:
 When option `normalize` is provided to the script, any dataset that has cosine distances
 will be normalized to inner product. So, for example, the dataset `glove-100-angular` 
 will be written at location `${RAFT_HOME}/bench/ann/data/glove-100-inner/`.
+```
 
 #### Step 2: Build and Search Index
-The script `scripts/ann-benchmarks/run.py` will build and search indices for a given dataset and its
+The script `bench/ann/run.py` will build and search indices for a given dataset and its
 specified configuration.
 To confirgure which algorithms are available, we use `algos.yaml`.
 To configure building/searching indices for a dataset, look at [index configuration](#json-index-config).
@@ -123,7 +124,7 @@ raft_ivf_pq:
 available in `raft/cpp/build/`.
 `disabled` : denotes whether an algorithm should be excluded from benchmark runs.
 
-The usage of the script `scripts/run.py` is:
+The usage of the script `bench/ann/run.py` is:
 ```bash
 usage: run.py [-h] [--configuration CONFIGURATION] [--dataset DATASET] [--build] [--search] [--algorithms ALGORITHMS] [--indices INDICES] [-f]
 
@@ -161,13 +162,13 @@ it is assumed both are `True`.
 is available in `algos.yaml` and not disabled, as well as having an associated executable.
 
 #### Step 3: Evaluating Results
-The script `scripts/ann-benchmarks/data_export.py` will evaluate results for a dataset whose index has been built
-and search with at least one algorithm. For every result file that is supplied to the script, the output
+The script `bench/ann/data_export.py` will evaluate results for a dataset whose index has been built
+and searched with at least one algorithm. For every result file that is available to the script, the output
 will be combined and written to a CSV file.
 
 The usage of this script is:
 ```bash
-usage: data_export.py [-h] --output OUTPUT [--recompute] [--dataset DATASET] [--dataset-path DATASET_PATH] <result-filepaths>
+usage: data_export.py [-h] --output OUTPUT [--recompute] [--dataset DATASET] [--dataset-path DATASET_PATH]
 
 options:
   -h, --help            show this help message and exit
@@ -178,10 +179,8 @@ options:
                         path to dataset folder (default: ${RAFT_HOME}/bench/ann/data)
 ```
 
-`result-filepaths` : whitespace delimited list of result files/directories that can be captured via pattern match. For more [information and examples](ann_benchmarks_low_level.html#result-filepath-example)
-
 #### Step 4: Plot Results
-The script `scripts/ann-benchmarks/plot.py` will plot all results evaluated to a CSV file for a given dataset.
+The script `bench/ann/plot.py` will plot all results evaluated to a CSV file for a given dataset.
 
 The usage of this script is:
 ```bash