From 0d28f738590ce855a474a5049d0dda77dc8ced5d Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 8 Nov 2023 13:41:50 -0500 Subject: [PATCH 1/3] Catching conversion errors in data_export instead of fully failing --- .../raft-ann-bench/data_export/__main__.py | 142 ++++++++++-------- 1 file changed, 79 insertions(+), 63 deletions(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py index e19ada2934..82ebd1ac67 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py @@ -17,6 +17,7 @@ import argparse import json import os +import traceback import warnings import pandas as pd @@ -57,74 +58,89 @@ def read_file(dataset, dataset_path, method): def convert_json_to_csv_build(dataset, dataset_path): for file, algo_name, df in read_file(dataset, dataset_path, "build"): - algo_name = algo_name.replace("_base", "") - df["name"] = df["name"].str.split("/").str[0] - write = pd.DataFrame( - { - "algo_name": [algo_name] * len(df), - "index_name": df["name"], - "time": df["real_time"], - } - ) - for name in df: - if name not in skip_build_cols: - write[name] = df[name] - filepath = os.path.normpath(file).split(os.sep) - filename = filepath[-1].split("-")[0] + ".csv" - write.to_csv( - os.path.join(f"{os.sep}".join(filepath[:-1]), filename), - index=False, - ) + try: + algo_name = algo_name.replace("_base", "") + df["name"] = df["name"].str.split("/").str[0] + write = pd.DataFrame( + { + "algo_name": [algo_name] * len(df), + "index_name": df["name"], + "time": df["real_time"], + } + ) + for name in df: + if name not in skip_build_cols: + write[name] = df[name] + filepath = os.path.normpath(file).split(os.sep) + filename = filepath[-1].split("-")[0] + ".csv" + write.to_csv( + os.path.join(f"{os.sep}".join(filepath[:-1]), filename), + index=False, + ) + except Exception as e: + print( + "An error occurred processing file %s (%s). Skipping..." + % (file, e) + ) + traceback.print_exc() def convert_json_to_csv_search(dataset, dataset_path): for file, algo_name, df in read_file(dataset, dataset_path, "search"): - build_file = os.path.join( - dataset_path, dataset, "result", "build", f"{algo_name}.csv" - ) - algo_name = algo_name.replace("_base", "") - df["name"] = df["name"].str.split("/").str[0] - write = pd.DataFrame( - { - "algo_name": [algo_name] * len(df), - "index_name": df["name"], - "recall": df["Recall"], - "qps": df["items_per_second"], - } - ) - for name in df: - if name not in skip_search_cols: - write[name] = df[name] - - if os.path.exists(build_file): - build_df = pd.read_csv(build_file) - write_ncols = len(write.columns) - write["build time"] = None - write["build threads"] = None - write["build cpu_time"] = None - write["build GPU"] = None - - for col_idx in range(5, len(build_df.columns)): - col_name = build_df.columns[col_idx] - write[col_name] = None - - for s_index, search_row in write.iterrows(): - for b_index, build_row in build_df.iterrows(): - if search_row["index_name"] == build_row["index_name"]: - write.iloc[s_index, write_ncols] = build_df.iloc[ - b_index, 2 - ] - write.iloc[s_index, write_ncols + 1 :] = build_df.iloc[ - b_index, 3: - ] - break - else: - warnings.warn( - f"Build CSV not found for {algo_name}, build params won't be " - "appended in the Search CSV" + try: + build_file = os.path.join( + dataset_path, dataset, "result", "build", f"{algo_name}.csv" ) - - write.to_csv(file.replace(".json", ".csv"), index=False) + algo_name = algo_name.replace("_base", "") + df["name"] = df["name"].str.split("/").str[0] + write = pd.DataFrame( + { + "algo_name": [algo_name] * len(df), + "index_name": df["name"], + "recall": df["Recall"], + "qps": df["items_per_second"], + } + ) + for name in df: + if name not in skip_search_cols: + write[name] = df[name] + + if os.path.exists(build_file): + build_df = pd.read_csv(build_file) + write_ncols = len(write.columns) + write["build time"] = None + write["build threads"] = None + write["build cpu_time"] = None + write["build GPU"] = None + + for col_idx in range(5, len(build_df.columns)): + col_name = build_df.columns[col_idx] + write[col_name] = None + + for s_index, search_row in write.iterrows(): + for b_index, build_row in build_df.iterrows(): + if search_row["index_name"] == build_row["index_name"]: + write.iloc[s_index, write_ncols] = build_df.iloc[ + b_index, 2 + ] + write.iloc[ + s_index, write_ncols + 1 : + ] = build_df.iloc[b_index, 3:] + break + else: + warnings.warn( + f"Build CSV not found for {algo_name}, " + f"build params won't be " + "appended in the Search CSV" + ) + + write.to_csv(file.replace(".json", ".csv"), index=False) + except Exception as e: + print( + "An error occurred processing file %s (%s). Skipping..." + % (file, e) + ) + traceback.print_exc() def main(): From a72d9ca76d8faf20a6dd5cb525d9af41ff1f8b1e Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 8 Nov 2023 14:26:03 -0500 Subject: [PATCH 2/3] Don't remove build json when it fails if we have to search next --- python/raft-ann-bench/src/raft-ann-bench/run/__main__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py index 6b01263c27..73a2d226dc 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py @@ -130,7 +130,8 @@ def run_build_and_search( except Exception as e: print("Error occurred running benchmark: %s" % e) finally: - os.remove(temp_conf_filename) + if not search: + os.remove(temp_conf_filename) if search: search_folder = os.path.join(legacy_result_folder, "search") From ee6c0bf2bc25f7f27b497fadeb8696a78cc03d11 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 8 Nov 2023 17:46:34 -0500 Subject: [PATCH 3/3] Fixing stye --- .../raft-ann-bench/src/raft-ann-bench/data_export/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py index ced6e475cd..fd6c2077e7 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py @@ -17,8 +17,8 @@ import argparse import json import os -import traceback import sys +import traceback import warnings import pandas as pd