From f73b0efed3138925a06f357bb6d21aaf7ac1bedf Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 7 Nov 2023 15:25:01 -0500
Subject: [PATCH 1/3] Adding dry-run option to raft-ann-bench

---
 .../src/raft-ann-bench/run/__main__.py        | 60 +++++++++++++++----
 1 file changed, 49 insertions(+), 11 deletions(-)

diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
index ac5d83e4c2..9ebbf7dbf4 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
@@ -18,6 +18,7 @@
 import json
 import os
 import subprocess
+import uuid
 from importlib import import_module
 
 import yaml
@@ -80,6 +81,7 @@ def run_build_and_search(
     force,
     build,
     search,
+    dry_run,
     k,
     batch_size,
     search_threads,
@@ -87,16 +89,16 @@ def run_build_and_search(
 ):
     for executable, ann_executable_path, algo in executables_to_run.keys():
         # Need to write temporary configuration
-        temp_conf_filename = f"temporary_{conf_filename}"
-        temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename)
-        with open(temp_conf_filepath, "w") as f:
+        temp_conf_filename = f"temporary_{conf_filename}_{uuid.uuid1()}.json"
+        with open(temp_conf_filename, "w") as f:
             temp_conf = dict()
             temp_conf["dataset"] = conf_file["dataset"]
             temp_conf["search_basic_param"] = conf_file["search_basic_param"]
             temp_conf["index"] = executables_to_run[
                 (executable, ann_executable_path, algo)
             ]["index"]
-            json.dump(temp_conf, f)
+            json_str = json.dumps(temp_conf, indent=2)
+            f.write(json_str)
 
         legacy_result_folder = os.path.join(
             dataset_path, conf_file["dataset"]["name"], "result"
@@ -116,8 +118,19 @@ def run_build_and_search(
             ]
             if force:
                 cmd = cmd + ["--overwrite"]
-            cmd = cmd + [temp_conf_filepath]
-            subprocess.run(cmd, check=True)
+            cmd = cmd + [temp_conf_filename]
+
+            if dry_run:
+                print(
+                    "Benchmark command for %s:\n%s\n" % (algo, " ".join(cmd))
+                )
+            else:
+                try:
+                    subprocess.run(cmd, check=True)
+                except Exception as e:
+                    print("Error occurred running benchmark: %s" % e)
+                finally:
+                    os.remove(temp_conf_filename)
 
         if search:
             search_folder = os.path.join(legacy_result_folder, "search")
@@ -141,10 +154,18 @@ def run_build_and_search(
             if search_threads:
                 cmd = cmd + ["--threads=%s" % search_threads]
 
-            cmd = cmd + [temp_conf_filepath]
-            subprocess.run(cmd, check=True)
-
-        os.remove(temp_conf_filepath)
+            cmd = cmd + [temp_conf_filename]
+            if dry_run:
+                print(
+                    "Benchmark command for %s:\n%s\n" % (algo, " ".join(cmd))
+                )
+            else:
+                try:
+                    subprocess.run(cmd, check=True)
+                except Exception as e:
+                    print("Error occurred running benchmark: %s" % e)
+                finally:
+                    os.remove(temp_conf_filename)
 
 
 def main():
@@ -260,6 +281,17 @@ def main():
         default=None,
     )
 
+    parser.add_argument(
+        "-r",
+        "--dry-run",
+        help="dry-run mode will convert the yaml config for the specified "
+        "algorithms and datasets to the json format that's consumed "
+        "by the lower-level c++ binaries and then print the command "
+        "to run execute the benchmarks but will not actually execute "
+        "the command.",
+        action="store_true",
+    )
+
     args = parser.parse_args()
 
     # If both build and search are not provided,
@@ -271,6 +303,11 @@ def main():
         build = args.build
         search = args.search
 
+    if not args.dry_run:
+        dry_run = False
+    else:
+        dry_run = True
+
     mode = args.search_mode
     k = args.count
     batch_size = args.batch_size
@@ -452,13 +489,14 @@ def add_algo_group(group_list):
 
     run_build_and_search(
         conf_file,
-        f"{args.dataset}.json",
+        f"{args.dataset}",
         conf_filedir,
         executables_to_run,
         args.dataset_path,
         args.force,
         build,
         search,
+        dry_run,
         k,
         batch_size,
         args.search_threads,

From ca38963e4ad48cff3f8535835c6205b134621ef1 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 7 Nov 2023 15:27:06 -0500
Subject: [PATCH 2/3] Adding dry-run to docs

---
 docs/source/raft_ann_benchmarks.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index cf4da30896..135c88b615 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -152,6 +152,11 @@ options:
   -f, --force           re-run algorithms even if their results already exist (default: False)
   -m SEARCH_MODE, --search-mode SEARCH_MODE
                         run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode (default: throughput)
+  -t SEARCH_THREADS, --search-threads SEARCH_THREADS
+                        specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is
+                        specified, then a single test is run with 'min' threads. By default min=1, max=<num hyper threads>. (default: None)
+  -r, --dry-run         dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed by the lower-level c++ binaries and then print the command to run execute the benchmarks but
+                        will not actually execute the command. (default: False)
 ```
 
 `dataset`: name of the dataset to be searched in [datasets.yaml](#yaml-dataset-config)

From 9a2eff4a7cbaf0c25b7a5fb186ff8663bae9f033 Mon Sep 17 00:00:00 2001
From: "Corey J. Nolet" <cjnolet@gmail.com>
Date: Tue, 7 Nov 2023 15:45:59 -0500
Subject: [PATCH 3/3] Review changes

---
 docs/source/raft_ann_benchmarks.md                       | 2 +-
 python/raft-ann-bench/src/raft-ann-bench/run/__main__.py | 9 +++------
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index 135c88b615..24fc3801d9 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -153,7 +153,7 @@ options:
   -m SEARCH_MODE, --search-mode SEARCH_MODE
                         run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode (default: throughput)
   -t SEARCH_THREADS, --search-threads SEARCH_THREADS
-                        specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is
+                        specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is
                         specified, then a single test is run with 'min' threads. By default min=1, max=<num hyper threads>. (default: None)
   -r, --dry-run         dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed by the lower-level c++ binaries and then print the command to run execute the benchmarks but
                         will not actually execute the command. (default: False)
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
index 9ebbf7dbf4..6b01263c27 100644
--- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
+++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
@@ -89,7 +89,7 @@ def run_build_and_search(
 ):
     for executable, ann_executable_path, algo in executables_to_run.keys():
         # Need to write temporary configuration
-        temp_conf_filename = f"temporary_{conf_filename}_{uuid.uuid1()}.json"
+        temp_conf_filename = f"{conf_filename}_{algo}_{uuid.uuid1()}.json"
         with open(temp_conf_filename, "w") as f:
             temp_conf = dict()
             temp_conf["dataset"] = conf_file["dataset"]
@@ -274,7 +274,7 @@ def main():
         "--search-threads",
         help="specify the number threads to use for throughput benchmark."
         " Single value or a pair of min and max separated by ':'. "
-        "Example --threads=1:4. Power of 2 values between 'min' "
+        "Example: --search-threads=1:4. Power of 2 values between 'min' "
         "and 'max' will be used. If only 'min' is specified, then a "
         "single test is run with 'min' threads. By default min=1, "
         "max=<num hyper threads>.",
@@ -303,10 +303,7 @@ def main():
         build = args.build
         search = args.search
 
-    if not args.dry_run:
-        dry_run = False
-    else:
-        dry_run = True
+    dry_run = args.dry_run
 
     mode = args.search_mode
     k = args.count