diff --git a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py index 47da9f39fa..fd6c2077e7 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py @@ -18,6 +18,7 @@ import json import os import sys +import traceback import warnings import pandas as pd @@ -58,74 +59,89 @@ def read_file(dataset, dataset_path, method): def convert_json_to_csv_build(dataset, dataset_path): for file, algo_name, df in read_file(dataset, dataset_path, "build"): - algo_name = algo_name.replace("_base", "") - df["name"] = df["name"].str.split("/").str[0] - write = pd.DataFrame( - { - "algo_name": [algo_name] * len(df), - "index_name": df["name"], - "time": df["real_time"], - } - ) - for name in df: - if name not in skip_build_cols: - write[name] = df[name] - filepath = os.path.normpath(file).split(os.sep) - filename = filepath[-1].split("-")[0] + ".csv" - write.to_csv( - os.path.join(f"{os.sep}".join(filepath[:-1]), filename), - index=False, - ) + try: + algo_name = algo_name.replace("_base", "") + df["name"] = df["name"].str.split("/").str[0] + write = pd.DataFrame( + { + "algo_name": [algo_name] * len(df), + "index_name": df["name"], + "time": df["real_time"], + } + ) + for name in df: + if name not in skip_build_cols: + write[name] = df[name] + filepath = os.path.normpath(file).split(os.sep) + filename = filepath[-1].split("-")[0] + ".csv" + write.to_csv( + os.path.join(f"{os.sep}".join(filepath[:-1]), filename), + index=False, + ) + except Exception as e: + print( + "An error occurred processing file %s (%s). Skipping..." + % (file, e) + ) + traceback.print_exc() def convert_json_to_csv_search(dataset, dataset_path): for file, algo_name, df in read_file(dataset, dataset_path, "search"): - build_file = os.path.join( - dataset_path, dataset, "result", "build", f"{algo_name}.csv" - ) - algo_name = algo_name.replace("_base", "") - df["name"] = df["name"].str.split("/").str[0] - write = pd.DataFrame( - { - "algo_name": [algo_name] * len(df), - "index_name": df["name"], - "recall": df["Recall"], - "qps": df["items_per_second"], - } - ) - for name in df: - if name not in skip_search_cols: - write[name] = df[name] - - if os.path.exists(build_file): - build_df = pd.read_csv(build_file) - write_ncols = len(write.columns) - write["build time"] = None - write["build threads"] = None - write["build cpu_time"] = None - write["build GPU"] = None - - for col_idx in range(5, len(build_df.columns)): - col_name = build_df.columns[col_idx] - write[col_name] = None - - for s_index, search_row in write.iterrows(): - for b_index, build_row in build_df.iterrows(): - if search_row["index_name"] == build_row["index_name"]: - write.iloc[s_index, write_ncols] = build_df.iloc[ - b_index, 2 - ] - write.iloc[s_index, write_ncols + 1 :] = build_df.iloc[ - b_index, 3: - ] - break - else: - warnings.warn( - f"Build CSV not found for {algo_name}, build params won't be " - "appended in the Search CSV" + try: + build_file = os.path.join( + dataset_path, dataset, "result", "build", f"{algo_name}.csv" ) - - write.to_csv(file.replace(".json", ".csv"), index=False) + algo_name = algo_name.replace("_base", "") + df["name"] = df["name"].str.split("/").str[0] + write = pd.DataFrame( + { + "algo_name": [algo_name] * len(df), + "index_name": df["name"], + "recall": df["Recall"], + "qps": df["items_per_second"], + } + ) + for name in df: + if name not in skip_search_cols: + write[name] = df[name] + + if os.path.exists(build_file): + build_df = pd.read_csv(build_file) + write_ncols = len(write.columns) + write["build time"] = None + write["build threads"] = None + write["build cpu_time"] = None + write["build GPU"] = None + + for col_idx in range(5, len(build_df.columns)): + col_name = build_df.columns[col_idx] + write[col_name] = None + + for s_index, search_row in write.iterrows(): + for b_index, build_row in build_df.iterrows(): + if search_row["index_name"] == build_row["index_name"]: + write.iloc[s_index, write_ncols] = build_df.iloc[ + b_index, 2 + ] + write.iloc[ + s_index, write_ncols + 1 : + ] = build_df.iloc[b_index, 3:] + break + else: + warnings.warn( + f"Build CSV not found for {algo_name}, " + f"build params won't be " + "appended in the Search CSV" + ) + + write.to_csv(file.replace(".json", ".csv"), index=False) + except Exception as e: + print( + "An error occurred processing file %s (%s). Skipping..." + % (file, e) + ) + traceback.print_exc() def main(): diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py index c9fde6dd7e..a33467b554 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py @@ -132,7 +132,8 @@ def run_build_and_search( except Exception as e: print("Error occurred running benchmark: %s" % e) finally: - os.remove(temp_conf_filename) + if not search: + os.remove(temp_conf_filename) if search: search_folder = os.path.join(legacy_result_folder, "search")