Skip to content

Commit

Permalink
more benchmark fixes
Browse files Browse the repository at this point in the history
Signed-off-by: Yu Chin Fabian Lim <[email protected]>
  • Loading branch information
fabianlim committed Jul 31, 2024
1 parent b04e2c0 commit a2d5f8b
Showing 1 changed file with 25 additions and 6 deletions.
31 changes: 25 additions & 6 deletions scripts/benchmarks/compare_with_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

DEFAULT_REFERENCE_FILEPATH = "scripts/benchmarks/refs/a100_80gb.csv"
BENCHMARK_FILENAME = "benchmarks.csv"
RAW_FILENAME = "raw_summary.csv"
OUTLIERS_FILENAME = "outliers.csv"

def plot_chart(ax, x, y, title, xlabel, ylabel):
Expand Down Expand Up @@ -82,8 +83,11 @@ def compare_results(df, ref, plot_columns, threshold_ratio=0.1):
return outliers_df, outliers, charts


def read_df(file_path, indices, plot_columns):
df = pd.read_csv(file_path)
def read_df(file_path_or_dataframe, indices, plot_columns):
if isinstance(file_path_or_dataframe, str):
df = pd.read_csv(file_path_or_dataframe)
else:
df = file_path_or_dataframe
df.set_index(indices, inplace=True)
# all other columns not for plotting or explicitly ignored are hyperparameters
argument_columns = [
Expand All @@ -98,10 +102,25 @@ def main(
result_dir, reference_benchmark_filepath, plot_columns, threshold_ratio, indices
):
ref, args_ref = read_df(reference_benchmark_filepath, indices, plot_columns)
new_benchmark_filepath = os.path.join(result_dir, BENCHMARK_FILENAME)
df, args_df = read_df(
new_benchmark_filepath, indices, plot_columns
)

# NOTE: this is a bit of a hack, if the new bench is a smaller bench, then we
# supplement the data from the raw summary
new_benchmark_filepath = os.path.join(result_dir, BENCHMARK_FILENAME)
try:
df, args_df = read_df(new_benchmark_filepath, indices, plot_columns)
except KeyError:
raw_filepath = os.path.join(result_dir, RAW_FILENAME)
print (
f"New '{new_benchmark_filepath}' is probably a partial bench. Supplementing "
f"missing columns from raw data '{raw_filepath}'."
)
df2 = pd.read_csv(new_benchmark_filepath)
df = pd.read_csv(raw_filepath)
df, args_df = read_df(
pd.concat([df, df2[[x for x in df2.columns if x not in df.columns]]], axis=1),
indices, plot_columns
)

# Analyse between both sets of results and retrieve outliers
# - this has a side effect of plotting the charts
outliers_df, outliers, charts = compare_results(
Expand Down

0 comments on commit a2d5f8b

Please sign in to comment.