From b5ad4f877f24e4ded029cae013243d78ac756766 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Nov 2023 14:47:13 -0800 Subject: [PATCH 1/5] add more fields to data exporter --- .../raft-ann-bench/data_export/__main__.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py index dd338c0c45..9d5fbb1675 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py @@ -20,6 +20,15 @@ import pandas as pd +skip_build_cols = set([ + "algo_name", "index_name", "time", "name", "family_index", + "per_family_instance_index", "run_name", "run_type", "repetitions", + "repetition_index", "iterations", "real_time", "cpu_time", "time_unit", + "index_size"]) + +skip_search_cols = set([ + "recall", "qps", "items_per_second", "Recall" +]) | skip_build_cols def read_file(dataset, dataset_path, method): dir = os.path.join(dataset_path, dataset, "result", method) @@ -42,6 +51,9 @@ def convert_json_to_csv_build(dataset, dataset_path): "time": df["real_time"], } ) + for name in df: + if name not in skip_build_cols: + write[name] = df[name] filepath = os.path.normpath(file).split(os.sep) filename = filepath[-1].split("-")[0] + ".csv" write.to_csv( @@ -52,6 +64,9 @@ def convert_json_to_csv_build(dataset, dataset_path): def convert_json_to_csv_search(dataset, dataset_path): for file, algo_name, df in read_file(dataset, dataset_path, "search"): + build_file = os.path.join( + dataset_path, dataset, "result", "build", f"{algo_name}.csv" + ) algo_name = algo_name.replace("_base", "") df["name"] = df["name"].str.split("/").str[0] write = pd.DataFrame( @@ -62,6 +77,28 @@ def convert_json_to_csv_search(dataset, dataset_path): "qps": df["items_per_second"], } ) + for name in df: + if name not in skip_search_cols: + write[name] = df[name] + print(build_file) + if os.path.exists( + build_file + ): + with open(build_file, "r") as f: + build_df = pd.read_csv(build_file) + write_n_cols = len(write.columns) + write["build GPU"] = None + write["build threads"] = None + for col_idx in range(5, len(build_df.columns)): + col_name = build_df.columns[col_idx] + write[col_name] = None + print(write.head()) + for s_index, search_row in write.iterrows(): + for b_index, build_row in build_df.iterrows(): + if search_row["index_name"] == build_row["index_name"]: + write.iloc[s_index, write_n_cols:] = build_df.iloc[b_index, 3:] + break + write.to_csv(file.replace(".json", ".csv"), index=False) From fc0d1b1fec549533d18dd49c90682f375c5eb817 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Nov 2023 14:51:19 -0800 Subject: [PATCH 2/5] style fix --- .../raft-ann-bench/data_export/__main__.py | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py index 9d5fbb1675..1d5500168d 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py @@ -20,15 +20,30 @@ import pandas as pd -skip_build_cols = set([ - "algo_name", "index_name", "time", "name", "family_index", - "per_family_instance_index", "run_name", "run_type", "repetitions", - "repetition_index", "iterations", "real_time", "cpu_time", "time_unit", - "index_size"]) +skip_build_cols = set( + [ + "algo_name", + "index_name", + "time", + "name", + "family_index", + "per_family_instance_index", + "run_name", + "run_type", + "repetitions", + "repetition_index", + "iterations", + "real_time", + "cpu_time", + "time_unit", + "index_size", + ] +) + +skip_search_cols = ( + set(["recall", "qps", "items_per_second", "Recall"]) | skip_build_cols +) -skip_search_cols = set([ - "recall", "qps", "items_per_second", "Recall" -]) | skip_build_cols def read_file(dataset, dataset_path, method): dir = os.path.join(dataset_path, dataset, "result", method) @@ -81,11 +96,8 @@ def convert_json_to_csv_search(dataset, dataset_path): if name not in skip_search_cols: write[name] = df[name] print(build_file) - if os.path.exists( - build_file - ): - with open(build_file, "r") as f: - build_df = pd.read_csv(build_file) + if os.path.exists(build_file): + build_df = pd.read_csv(build_file) write_n_cols = len(write.columns) write["build GPU"] = None write["build threads"] = None @@ -96,7 +108,9 @@ def convert_json_to_csv_search(dataset, dataset_path): for s_index, search_row in write.iterrows(): for b_index, build_row in build_df.iterrows(): if search_row["index_name"] == build_row["index_name"]: - write.iloc[s_index, write_n_cols:] = build_df.iloc[b_index, 3:] + write.iloc[s_index, write_n_cols:] = build_df.iloc[ + b_index, 3: + ] break write.to_csv(file.replace(".json", ".csv"), index=False) From 6e175d3c976e2c88568c5ea0139793dd1084c754 Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Nov 2023 14:52:17 -0800 Subject: [PATCH 3/5] remove prints --- .../raft-ann-bench/src/raft-ann-bench/data_export/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py index 1d5500168d..5cd7f5c731 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py @@ -95,7 +95,7 @@ def convert_json_to_csv_search(dataset, dataset_path): for name in df: if name not in skip_search_cols: write[name] = df[name] - print(build_file) + if os.path.exists(build_file): build_df = pd.read_csv(build_file) write_n_cols = len(write.columns) @@ -104,7 +104,7 @@ def convert_json_to_csv_search(dataset, dataset_path): for col_idx in range(5, len(build_df.columns)): col_name = build_df.columns[col_idx] write[col_name] = None - print(write.head()) + for s_index, search_row in write.iterrows(): for b_index, build_row in build_df.iterrows(): if search_row["index_name"] == build_row["index_name"]: From 16d619e1993500e81f9c7b0481d8d47bb0583e0c Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Nov 2023 15:13:27 -0800 Subject: [PATCH 4/5] add more params --- .../src/raft-ann-bench/data_export/__main__.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py index 5cd7f5c731..cf28416327 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py @@ -17,6 +17,7 @@ import argparse import json import os +import warnings import pandas as pd @@ -34,7 +35,6 @@ "repetition_index", "iterations", "real_time", - "cpu_time", "time_unit", "index_size", ] @@ -98,9 +98,12 @@ def convert_json_to_csv_search(dataset, dataset_path): if os.path.exists(build_file): build_df = pd.read_csv(build_file) - write_n_cols = len(write.columns) - write["build GPU"] = None + write_ncols = len(write.columns) + write["build time"] = None write["build threads"] = None + write["build cpu_time"] = None + write["build GPU"] = None + for col_idx in range(5, len(build_df.columns)): col_name = build_df.columns[col_idx] write[col_name] = None @@ -108,10 +111,17 @@ def convert_json_to_csv_search(dataset, dataset_path): for s_index, search_row in write.iterrows(): for b_index, build_row in build_df.iterrows(): if search_row["index_name"] == build_row["index_name"]: - write.iloc[s_index, write_n_cols:] = build_df.iloc[ + write.iloc[s_index, write_ncols] = build_df.iloc[ + b_index, 2 + ] + write.iloc[s_index, write_ncols + 1:] = build_df.iloc[ b_index, 3: ] break + else: + warnings.warn( + f"Build CSV not found for {algo_name}, build params won't be " + "appended in the Search CSV") write.to_csv(file.replace(".json", ".csv"), index=False) From a195530ba899c7e4ad5adb803a0ad84fc782bdde Mon Sep 17 00:00:00 2001 From: divyegala Date: Tue, 7 Nov 2023 15:17:08 -0800 Subject: [PATCH 5/5] style fixes --- .../src/raft-ann-bench/data_export/__main__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py index cf28416327..e19ada2934 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/data_export/__main__.py @@ -114,14 +114,15 @@ def convert_json_to_csv_search(dataset, dataset_path): write.iloc[s_index, write_ncols] = build_df.iloc[ b_index, 2 ] - write.iloc[s_index, write_ncols + 1:] = build_df.iloc[ + write.iloc[s_index, write_ncols + 1 :] = build_df.iloc[ b_index, 3: ] break else: warnings.warn( f"Build CSV not found for {algo_name}, build params won't be " - "appended in the Search CSV") + "appended in the Search CSV" + ) write.to_csv(file.replace(".json", ".csv"), index=False)