Skip to content

Commit

Permalink
Added plot creator
Browse files Browse the repository at this point in the history
  • Loading branch information
ancestor-mithril committed May 20, 2024
1 parent 091b56f commit 34527cc
Showing 1 changed file with 274 additions and 0 deletions.
274 changes: 274 additions & 0 deletions plot_creator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
import glob
import os
import warnings

import seaborn as sns
import pandas as pd
from matplotlib import pyplot as plt
from tensorboard.backend.event_processing import event_accumulator
from tqdm import tqdm


def get_tensorboard_scalars(path):
ea = event_accumulator.EventAccumulator(
path,
size_guidance={event_accumulator.SCALARS: 0},
)
_ = ea.Reload()
return {scalar: tuple([x.value for x in ea.Scalars(scalar)]) for scalar in ea.Tags()["scalars"]}


def parse_dataset_name(path):
assert path.split(os.path.sep)[5].startswith('epochs_'), str(path.split(os.path.sep)[5:7])
return path.split(os.path.sep)[6]


def parse_scheduler_type(path):
scheduler = path.split(os.path.sep + 'scheduler_')[1].split(os.path.sep)[0]
scheduler_params = path.split(os.path.sep + 'scheduler_params_')[1].split(os.path.sep)[0]
bs = path.split(os.path.sep + 'bs_')[1].split(os.path.sep)[0]
return scheduler, scheduler_params, bs


def match_paths_by_criteria(tb_paths):
match_rules = {
('CosineAnnealingBS', 'total_iters_100_max_batch_size_1000_'):
('CosineAnnealingLR', 'T_max_100_'),
('CosineAnnealingBS', 'total_iters_200_max_batch_size_1000_'):
('CosineAnnealingLR', 'T_max_200_'),
('CosineAnnealingBS', 'total_iters_50_max_batch_size_1000_'):
('CosineAnnealingLR', 'T_max_50_'),

('CosineAnnealingBSWithWarmRestarts', 't_0_100_factor_1_max_batch_size_1000_'):
('CosineAnnealingWarmRestarts', 'T_0_100_T_mult_1_'),
('CosineAnnealingBSWithWarmRestarts', 't_0_50_factor_1_max_batch_size_1000_'):
('CosineAnnealingWarmRestarts', 'T_0_50_T_mult_1_'),

('ExponentialBS', 'gamma_1.1_max_batch_size_1000_'):
('ExponentialLR', 'gamma_0.9_'),
('ExponentialBS', 'gamma_2_max_batch_size_1000_'):
('ExponentialLR', 'gamma_0.5_'),

('IncreaseBSOnPlateau', 'mode_min_factor_2.0_max_batch_size_1000_'):
('ReduceLROnPlateau', 'mode_min_factor_0.5_'),
('IncreaseBSOnPlateau', 'mode_min_factor_5.0_max_batch_size_1000_'):
('ReduceLROnPlateau', 'mode_min_factor_0.2_'),

('PolynomialBS', 'total_iters_200_max_batch_size_1000_'):
('PolynomialLR', 'total_iters_200_'),

('StepBS', 'step_size_30_gamma_2.0_max_batch_size_1000_'):
('StepLR', 'step_size_30_gamma_0.5_'),
('StepBS', 'step_size_50_gamma_2.0_max_batch_size_1000_'):
('StepLR', 'step_size_50_gamma_0.5_'),
('StepBS', 'step_size_30_gamma_5.0_max_batch_size_1000_'):
('StepLR', 'step_size_30_gamma_0.2_'),
('StepBS', 'step_size_50_gamma_5.0_max_batch_size_1000_'):
('StepLR', 'step_size_50_gamma_0.2_'),
}
match_rules.update({v: k for k, v in match_rules.items()}) # reversed rules

groups = []
while len(tb_paths):
current_path = tb_paths.pop(0)
dataset = parse_dataset_name(current_path)
scheduler, scheduler_param, initial_batch_size = parse_scheduler_type(current_path)

def is_ok(other_path):
other_dataset = parse_dataset_name(other_path)
other_scheduler, other_scheduler_param, other_initial_batch_size = parse_scheduler_type(other_path)
if other_dataset != dataset or other_initial_batch_size != initial_batch_size:
return False
if match_rules[(scheduler, scheduler_param)] != (other_scheduler, other_scheduler_param):
return False
return True

matching = [x for x in tb_paths if is_ok(x)]

if len(matching) == 0:
print("No matching for", current_path)

for x in matching:
tb_paths.remove(x)

groups.append((current_path, *matching))

return groups


def parse_tensorboard(path):
scalars = get_tensorboard_scalars(path)
dataset = parse_dataset_name(path)
scheduler, scheduler_param, initial_batch_size = parse_scheduler_type(path)

epoch = len(scalars['Train/Time'])
experiment_time = round(scalars['Train/Time'][epoch - 1] / 3600, 2)
max_train_accuracy = round(max(scalars['Train/Accuracy'][:epoch]), 2)
max_val_accuracy = round(max(scalars['Val/Accuracy'][:epoch]), 2)
initial_learning_rate = round(scalars['Trainer/Learning Rate'][0], 2)
return {
'dataset': dataset,
'scheduler': scheduler,
'scheduler_param': scheduler_param,
'experiment_time': experiment_time,
'experiment_times': scalars['Train/Time'][:epoch],
'train_epochs': epoch,
'max_train_accuracy': max_train_accuracy,
'max_val_accuracy': max_val_accuracy,
'initial_batch_size': initial_batch_size,
'initial_learning_rate': initial_learning_rate,
'train_accuracies': tuple([x / 100 for x in scalars['Train/Accuracy'][:epoch]]),
'val_accuracies': tuple([x / 100 for x in scalars['Val/Accuracy'][:epoch]]),
'batch_sizes': scalars['Trainer/Batch Size'][:epoch],
'learning_rates': scalars['Trainer/Learning Rate'][:epoch],

}


def create_tex(group_results, results_dir):
scheduler_acronym = {
'IncreaseBSOnPlateau': 'IBS',
'ReduceLROnPlateau': 'RLR',
'StepLR': 'StepLR',
'StepBS': 'StepBS',
'CosineAnnealingBS': 'CosBS',
'CosineAnnealingLR': 'CosLR',
'CosineAnnealingBSWithWarmRestarts': 'CosWwrBS',
'CosineAnnealingWarmRestarts': 'CosWwrLR',
'ExponentialBS': 'ExpBS',
'ExponentialLR': 'ExpLR',
'PolynomialBS': 'PolyBS',
'PolynomialLR': 'PolyLR',
}

tex_file = os.path.join(results_dir, 'results_table.txt')
if not os.path.exists(tex_file):
open(tex_file, 'w').write(
r'\begin{table}[]''\n'
r'\resizebox{\textwidth}{!}{''\n'
r'\begin{tabular}{|c|ccc|cc|c|}''\n'
r'\hline''\n'
r'Dataset & Scheduler & First LR & First BS & Train Acc. & Val Acc. & Time (h) \\ \hline''\n'
)

length = min([x['train_epochs'] for x in group_results])

for result in group_results:
scheduler_name = scheduler_acronym[result['scheduler']]
experiment_time = round(result['experiment_times'][length - 1] / 3600, 2)
open(tex_file, 'a').write(
f"{result['dataset']} & "
f"{scheduler_name}({result['scheduler_param'].replace('_', ',')}) & "
f"{result['initial_learning_rate']} & "
f"{result['initial_batch_size']} & "
f"{result['max_train_accuracy']} & "
f"{result['max_val_accuracy']} & "
f"{experiment_time:.2f}"
r'\\'
'\n'
)
open(tex_file, 'a').write(r'\hline''\n')


def create_graphics(group_results, results_dir):
exp_1, exp_2 = group_results

colors = ['darkred', 'royalblue', 'orange']
with sns.axes_style("whitegrid"):
fig, axes = plt.subplots(1, 2, figsize=(13, 7.8 / 2))

length = min(exp_1['train_epochs'], exp_2['train_epochs'])

def create_df(exp):
def pad_tuple(x):
size = length - len(x)
if size >= 0:
return x
if size > 0:
warnings.warn('Problem here')
return x + (x[-1],) * size
return x[:size]

return pd.DataFrame.from_dict({
'epoch': tuple(range(length)),
'Train Acc.': pad_tuple(exp['train_accuracies']),
'Val Acc.': pad_tuple(exp['val_accuracies']),
'Batch Size': pad_tuple(exp['batch_sizes']),
'Learning Rate': pad_tuple(exp['learning_rates']),
})

df_1 = create_df(exp_1)
df_2 = create_df(exp_2)

# Train
sns.lineplot(x="epoch", y="Train Acc.", data=df_1, linewidth='1',
color=colors[0], linestyle='-', alpha=0.7, ax=axes[0],
label=f"{exp_1['scheduler']}({exp_1['scheduler_param']})")
sns.lineplot(x="epoch", y="Train Acc.", data=df_2, linewidth='1',
color=colors[1], linestyle='-', alpha=0.7, ax=axes[0],
label=f"{exp_2['scheduler']}({exp_2['scheduler_param']})")
axes[0].set_ylim(0.0, 1.1)
sns.move_legend(axes[0], "upper left", bbox_to_anchor=(0.85, 1.2))

# Val
sns.lineplot(x="epoch", y="Val Acc.", data=df_1, linewidth='1',
color=colors[0], linestyle='-', alpha=0.7, ax=axes[1])
sns.lineplot(x="epoch", y="Val Acc.", data=df_2, linewidth='1',
color=colors[1], linestyle='-', alpha=0.7, ax=axes[1])
axes[1].set_ylim(0.0, 1.1)

plt.savefig(os.path.join(results_dir, 'plots', f"{exp_1['scheduler']}_"
f"{exp_1['dataset']}_{exp_1['initial_batch_size']}_"
f"{exp_1['scheduler_param']}_"
f"{exp_1['initial_learning_rate']}_first.png"),
bbox_inches='tight')
plt.close()

fig, axes = plt.subplots(1, 2, figsize=(13, 7.8 / 2))
# BS
sns.lineplot(x="epoch", y="Batch Size", data=df_1, linewidth='1.5',
color=colors[0], linestyle='-', alpha=0.7, ax=axes[0])
sns.lineplot(x="epoch", y="Batch Size", data=df_2, linewidth='1.5',
color=colors[1], linestyle='-', alpha=0.7, ax=axes[0])

# LR
sns.lineplot(x="epoch", y="Learning Rate", data=df_1, linewidth='1.5',
color=colors[0], linestyle='-', alpha=0.7, ax=axes[1])
sns.lineplot(x="epoch", y="Learning Rate", data=df_2, linewidth='1.5',
color=colors[1], linestyle='-', alpha=0.7, ax=axes[1])

plt.savefig(os.path.join(results_dir, 'plots', f"{exp_1['scheduler']}_"
f"{exp_1['dataset']}_{exp_1['initial_batch_size']}_"
f"{exp_1['scheduler_param']}_"
f"{exp_1['initial_learning_rate']}_second.png"),
bbox_inches='tight')
plt.close()


def get_tensorboard_paths(base_dir):
return glob.glob(f"{base_dir}/**/events.out.tfevents*", recursive=True)


def main(base_dir, results_dir):
os.makedirs(os.path.join(results_dir, 'plots'), exist_ok=True)
tex_file = os.path.join(results_dir, 'results_table.txt')
tb_paths = get_tensorboard_paths(base_dir)

if os.path.exists(tex_file):
os.remove(tex_file)

for group in tqdm(match_paths_by_criteria(tb_paths)):
group_results = [parse_tensorboard(x) for x in group]
create_tex(group_results, results_dir)
create_graphics(group_results, results_dir)
open(tex_file, 'a').write(
r'\end{tabular}''\n'
r'}''\n'
r'\end{table}''\n'
)

pass


if __name__ == '__main__':
main('./all_runs', 'Graphics')

0 comments on commit 34527cc

Please sign in to comment.