Skip to content

Commit

Permalink
Catching conversion errors in data_export instead of fully failing (#…
Browse files Browse the repository at this point in the history
…1979)

Authors:
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Divye Gala (https://github.com/divyegala)

URL: #1979
  • Loading branch information
cjnolet authored Nov 9, 2023
1 parent 27b23a2 commit 061c0cf
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 64 deletions.
142 changes: 79 additions & 63 deletions python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import json
import os
import sys
import traceback
import warnings

import pandas as pd
Expand Down Expand Up @@ -58,74 +59,89 @@ def read_file(dataset, dataset_path, method):

def convert_json_to_csv_build(dataset, dataset_path):
for file, algo_name, df in read_file(dataset, dataset_path, "build"):
algo_name = algo_name.replace("_base", "")
df["name"] = df["name"].str.split("/").str[0]
write = pd.DataFrame(
{
"algo_name": [algo_name] * len(df),
"index_name": df["name"],
"time": df["real_time"],
}
)
for name in df:
if name not in skip_build_cols:
write[name] = df[name]
filepath = os.path.normpath(file).split(os.sep)
filename = filepath[-1].split("-")[0] + ".csv"
write.to_csv(
os.path.join(f"{os.sep}".join(filepath[:-1]), filename),
index=False,
)
try:
algo_name = algo_name.replace("_base", "")
df["name"] = df["name"].str.split("/").str[0]
write = pd.DataFrame(
{
"algo_name": [algo_name] * len(df),
"index_name": df["name"],
"time": df["real_time"],
}
)
for name in df:
if name not in skip_build_cols:
write[name] = df[name]
filepath = os.path.normpath(file).split(os.sep)
filename = filepath[-1].split("-")[0] + ".csv"
write.to_csv(
os.path.join(f"{os.sep}".join(filepath[:-1]), filename),
index=False,
)
except Exception as e:
print(
"An error occurred processing file %s (%s). Skipping..."
% (file, e)
)
traceback.print_exc()


def convert_json_to_csv_search(dataset, dataset_path):
for file, algo_name, df in read_file(dataset, dataset_path, "search"):
build_file = os.path.join(
dataset_path, dataset, "result", "build", f"{algo_name}.csv"
)
algo_name = algo_name.replace("_base", "")
df["name"] = df["name"].str.split("/").str[0]
write = pd.DataFrame(
{
"algo_name": [algo_name] * len(df),
"index_name": df["name"],
"recall": df["Recall"],
"qps": df["items_per_second"],
}
)
for name in df:
if name not in skip_search_cols:
write[name] = df[name]

if os.path.exists(build_file):
build_df = pd.read_csv(build_file)
write_ncols = len(write.columns)
write["build time"] = None
write["build threads"] = None
write["build cpu_time"] = None
write["build GPU"] = None

for col_idx in range(5, len(build_df.columns)):
col_name = build_df.columns[col_idx]
write[col_name] = None

for s_index, search_row in write.iterrows():
for b_index, build_row in build_df.iterrows():
if search_row["index_name"] == build_row["index_name"]:
write.iloc[s_index, write_ncols] = build_df.iloc[
b_index, 2
]
write.iloc[s_index, write_ncols + 1 :] = build_df.iloc[
b_index, 3:
]
break
else:
warnings.warn(
f"Build CSV not found for {algo_name}, build params won't be "
"appended in the Search CSV"
try:
build_file = os.path.join(
dataset_path, dataset, "result", "build", f"{algo_name}.csv"
)

write.to_csv(file.replace(".json", ".csv"), index=False)
algo_name = algo_name.replace("_base", "")
df["name"] = df["name"].str.split("/").str[0]
write = pd.DataFrame(
{
"algo_name": [algo_name] * len(df),
"index_name": df["name"],
"recall": df["Recall"],
"qps": df["items_per_second"],
}
)
for name in df:
if name not in skip_search_cols:
write[name] = df[name]

if os.path.exists(build_file):
build_df = pd.read_csv(build_file)
write_ncols = len(write.columns)
write["build time"] = None
write["build threads"] = None
write["build cpu_time"] = None
write["build GPU"] = None

for col_idx in range(5, len(build_df.columns)):
col_name = build_df.columns[col_idx]
write[col_name] = None

for s_index, search_row in write.iterrows():
for b_index, build_row in build_df.iterrows():
if search_row["index_name"] == build_row["index_name"]:
write.iloc[s_index, write_ncols] = build_df.iloc[
b_index, 2
]
write.iloc[
s_index, write_ncols + 1 :
] = build_df.iloc[b_index, 3:]
break
else:
warnings.warn(
f"Build CSV not found for {algo_name}, "
f"build params won't be "
"appended in the Search CSV"
)

write.to_csv(file.replace(".json", ".csv"), index=False)
except Exception as e:
print(
"An error occurred processing file %s (%s). Skipping..."
% (file, e)
)
traceback.print_exc()


def main():
Expand Down
3 changes: 2 additions & 1 deletion python/raft-ann-bench/src/raft-ann-bench/run/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ def run_build_and_search(
except Exception as e:
print("Error occurred running benchmark: %s" % e)
finally:
os.remove(temp_conf_filename)
if not search:
os.remove(temp_conf_filename)

if search:
search_folder = os.path.join(legacy_result_folder, "search")
Expand Down

0 comments on commit 061c0cf

Please sign in to comment.