diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index cf4da30896..24fc3801d9 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -152,6 +152,11 @@ options: -f, --force re-run algorithms even if their results already exist (default: False) -m SEARCH_MODE, --search-mode SEARCH_MODE run search in 'latency' (measure individual batches) or 'throughput' (pipeline batches and measure end-to-end) mode (default: throughput) + -t SEARCH_THREADS, --search-threads SEARCH_THREADS + specify the number threads to use for throughput benchmark. Single value or a pair of min and max separated by ':'. Example --search-threads=1:4. Power of 2 values between 'min' and 'max' will be used. If only 'min' is + specified, then a single test is run with 'min' threads. By default min=1, max=. (default: None) + -r, --dry-run dry-run mode will convert the yaml config for the specified algorithms and datasets to the json format that's consumed by the lower-level c++ binaries and then print the command to run execute the benchmarks but + will not actually execute the command. (default: False) ``` `dataset`: name of the dataset to be searched in [datasets.yaml](#yaml-dataset-config) diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py index ac5d83e4c2..6b01263c27 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py @@ -18,6 +18,7 @@ import json import os import subprocess +import uuid from importlib import import_module import yaml @@ -80,6 +81,7 @@ def run_build_and_search( force, build, search, + dry_run, k, batch_size, search_threads, @@ -87,16 +89,16 @@ def run_build_and_search( ): for executable, ann_executable_path, algo in executables_to_run.keys(): # Need to write temporary configuration - temp_conf_filename = f"temporary_{conf_filename}" - temp_conf_filepath = os.path.join(conf_filedir, temp_conf_filename) - with open(temp_conf_filepath, "w") as f: + temp_conf_filename = f"{conf_filename}_{algo}_{uuid.uuid1()}.json" + with open(temp_conf_filename, "w") as f: temp_conf = dict() temp_conf["dataset"] = conf_file["dataset"] temp_conf["search_basic_param"] = conf_file["search_basic_param"] temp_conf["index"] = executables_to_run[ (executable, ann_executable_path, algo) ]["index"] - json.dump(temp_conf, f) + json_str = json.dumps(temp_conf, indent=2) + f.write(json_str) legacy_result_folder = os.path.join( dataset_path, conf_file["dataset"]["name"], "result" @@ -116,8 +118,19 @@ def run_build_and_search( ] if force: cmd = cmd + ["--overwrite"] - cmd = cmd + [temp_conf_filepath] - subprocess.run(cmd, check=True) + cmd = cmd + [temp_conf_filename] + + if dry_run: + print( + "Benchmark command for %s:\n%s\n" % (algo, " ".join(cmd)) + ) + else: + try: + subprocess.run(cmd, check=True) + except Exception as e: + print("Error occurred running benchmark: %s" % e) + finally: + os.remove(temp_conf_filename) if search: search_folder = os.path.join(legacy_result_folder, "search") @@ -141,10 +154,18 @@ def run_build_and_search( if search_threads: cmd = cmd + ["--threads=%s" % search_threads] - cmd = cmd + [temp_conf_filepath] - subprocess.run(cmd, check=True) - - os.remove(temp_conf_filepath) + cmd = cmd + [temp_conf_filename] + if dry_run: + print( + "Benchmark command for %s:\n%s\n" % (algo, " ".join(cmd)) + ) + else: + try: + subprocess.run(cmd, check=True) + except Exception as e: + print("Error occurred running benchmark: %s" % e) + finally: + os.remove(temp_conf_filename) def main(): @@ -253,13 +274,24 @@ def main(): "--search-threads", help="specify the number threads to use for throughput benchmark." " Single value or a pair of min and max separated by ':'. " - "Example --threads=1:4. Power of 2 values between 'min' " + "Example: --search-threads=1:4. Power of 2 values between 'min' " "and 'max' will be used. If only 'min' is specified, then a " "single test is run with 'min' threads. By default min=1, " "max=.", default=None, ) + parser.add_argument( + "-r", + "--dry-run", + help="dry-run mode will convert the yaml config for the specified " + "algorithms and datasets to the json format that's consumed " + "by the lower-level c++ binaries and then print the command " + "to run execute the benchmarks but will not actually execute " + "the command.", + action="store_true", + ) + args = parser.parse_args() # If both build and search are not provided, @@ -271,6 +303,8 @@ def main(): build = args.build search = args.search + dry_run = args.dry_run + mode = args.search_mode k = args.count batch_size = args.batch_size @@ -452,13 +486,14 @@ def add_algo_group(group_list): run_build_and_search( conf_file, - f"{args.dataset}.json", + f"{args.dataset}", conf_filedir, executables_to_run, args.dataset_path, args.force, build, search, + dry_run, k, batch_size, args.search_threads,