From f73b0efed3138925a06f357bb6d21aaf7ac1bedf Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 7 Nov 2023 15:25:01 -0500 Subject: [PATCH 1/3] Adding dry-run option to raft-ann-bench --- .../src/raft-ann-bench/run/__main__.py | 60 +++++++++++++++---- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py index ac5d83e4c2..9ebbf7dbf4 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py @@ -18,6 +18,7 @@ import json import os import subprocess +import uuid from importlib import import_module import yaml @@ -80,6 +81,7 @@ def run_build_and_search( force, build, search, + dry_run, k, batch_size, search_threads, @@ -87,16 +89,16 @@ def run_build_and_search( ): for executable, ann_executable_path, algo in executables_to_run.keys(): # Need to write temporary configuration - temp_conf_filename = f"temporary_{conf_filename}" - temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename) - with open(temp_conf_filepath, "w") as f: + temp_conf_filename = f"temporary_{conf_filename}_{uuid.uuid1()}.json" + with open(temp_conf_filename, "w") as f: temp_conf = dict() temp_conf["dataset"] = conf_file["dataset"] temp_conf["search_basic_param"] = conf_file["search_basic_param"] temp_conf["index"] = executables_to_run[ (executable, ann_executable_path, algo) ]["index"] - json.dump(temp_conf, f) + json_str = json.dumps(temp_conf, indent=2) + f.write(json_str) legacy_result_folder = os.path.join( dataset_path, conf_file["dataset"]["name"], "result" @@ -116,8 +118,19 @@ def run_build_and_search( ] if force: cmd = cmd + ["--overwrite"] - cmd = cmd + [temp_conf_filepath] - subprocess.run(cmd, check=True) + cmd = cmd + [temp_conf_filename] + + if dry_run: + print( + "Benchmark command for %s:\n%s\n" % (algo, " ".join(cmd)) + ) + else: + try: + subprocess.run(cmd, check=True) + except Exception as e: + print("Error occurred running benchmark: %s" % e) + finally: + os.remove(temp_conf_filename) if search: search_folder = os.path.join(legacy_result_folder, "search") @@ -141,10 +154,18 @@ def run_build_and_search( if search_threads: cmd = cmd + ["--threads=%s" % search_threads] - cmd = cmd + [temp_conf_filepath] - subprocess.run(cmd, check=True) - - os.remove(temp_conf_filepath) + cmd = cmd + [temp_conf_filename] + if dry_run: + print( + "Benchmark command for %s:\n%s\n" % (algo, " ".join(cmd)) + ) + else: + try: + subprocess.run(cmd, check=True) + except Exception as e: + print("Error occurred running benchmark: %s" % e) + finally: + os.remove(temp_conf_filename) def main(): @@ -260,6 +281,17 @@ def main(): default=None, ) + parser.add_argument( + "-r", + "--dry-run", + help="dry-run mode will convert the yaml config for the specified " + "algorithms and datasets to the json format that's consumed " + "by the lower-level c++ binaries and then print the command " + "to run execute the benchmarks but will not actually execute " + "the command.", + action="store_true", + ) + args = parser.parse_args() # If both build and search are not provided, @@ -271,6 +303,11 @@ def main(): build = args.build search = args.search + if not args.dry_run: + dry_run = False + else: + dry_run = True + mode = args.search_mode k = args.count batch_size = args.batch_size @@ -452,13 +489,14 @@ def add_algo_group(group_list): run_build_and_search( conf_file, - f"{args.dataset}.json", + f"{args.dataset}", conf_filedir, executables_to_run, args.dataset_path, args.force, build, search, + dry_run, k, batch_size, args.search_threads, From ca38963e4ad48cff3f8535835c6205b134621ef1 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 7 Nov 2023 15:27:06 -0500 Subject: [PATCH 2/3] Adding dry-run to docs --- docs/source/raft_ann_benchmarks.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index cf4da30896..135c88b615 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -152,6 +152,11 @@ options: -f, --force re-run algorithms even if their results already exist (default: False) -m SEARCH_MODE, --search-mode SEARCH_MODE run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode (default: throughput) + -t SEARCH_THREADS, --search-threads SEARCH_THREADS + specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is + specified, then a single test is run with 'min' threads. By default min=1, max=. (default: None) + -r, --dry-run dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed by the lower-level c++ binaries and then print the command to run execute the benchmarks but + will not actually execute the command. (default: False) ``` `dataset`: name of the dataset to be searched in [datasets.yaml](#yaml-dataset-config) From 9a2eff4a7cbaf0c25b7a5fb186ff8663bae9f033 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 7 Nov 2023 15:45:59 -0500 Subject: [PATCH 3/3] Review changes --- docs/source/raft_ann_benchmarks.md | 2 +- python/raft-ann-bench/src/raft-ann-bench/run/__main__.py | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index 135c88b615..24fc3801d9 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -153,7 +153,7 @@ options: -m SEARCH_MODE, --search-mode SEARCH_MODE run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode (default: throughput) -t SEARCH_THREADS, --search-threads SEARCH_THREADS - specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is + specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is specified, then a single test is run with 'min' threads. By default min=1, max=. (default: None) -r, --dry-run dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed by the lower-level c++ binaries and then print the command to run execute the benchmarks but will not actually execute the command. (default: False) diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py index 9ebbf7dbf4..6b01263c27 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py @@ -89,7 +89,7 @@ def run_build_and_search( ): for executable, ann_executable_path, algo in executables_to_run.keys(): # Need to write temporary configuration - temp_conf_filename = f"temporary_{conf_filename}_{uuid.uuid1()}.json" + temp_conf_filename = f"{conf_filename}_{algo}_{uuid.uuid1()}.json" with open(temp_conf_filename, "w") as f: temp_conf = dict() temp_conf["dataset"] = conf_file["dataset"] @@ -274,7 +274,7 @@ def main(): "--search-threads", help="specify the number threads to use for throughput benchmark." " Single value or a pair of min and max separated by ':'. " - "Example --threads=1:4. Power of 2 values between 'min' " + "Example: --search-threads=1:4. Power of 2 values between 'min' " "and 'max' will be used. If only 'min' is specified, then a " "single test is run with 'min' threads. By default min=1, " "max=.", @@ -303,10 +303,7 @@ def main(): build = args.build search = args.search - if not args.dry_run: - dry_run = False - else: - dry_run = True + dry_run = args.dry_run mode = args.search_mode k = args.count