-
Notifications
You must be signed in to change notification settings - Fork 0
/
analysis.py
220 lines (185 loc) · 9.22 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
# Define capability benchmarks
cap_names = ["logiqa", "piqa", "hellaswag", "winogrande", 'superglue_copa', "medqa_4options", "arc_challenge", "mmlu", "minerva_math", "lambada_openai", "gsm8k", "bbh"]
def load_data():
"""
Load data from CSV files and return as pandas DataFrames.
"""
base_model_df = pd.read_csv('data/benchmarks_base_models.csv', index_col='model')
chat_model_df = pd.read_csv('data/benchmarks_chat_models.csv', index_col='model')
evals_df = pd.read_csv('data/benchmarks_info.csv', index_col='benchmark')
return base_model_df, chat_model_df, evals_df
def find_nans(model_df):
"""
Identifies and prints the locations of NaNs in the dataframe.
"""
df_filtered = model_df.drop(columns=["model_size", "FLOP", "name", "type"])
missing_fraction = df_filtered.isnull().mean().mean()
print(f"Fraction missing data: {missing_fraction}")
missing_data = df_filtered.isnull()
missing_indices = missing_data.stack()[missing_data.stack()].index
for row, col in missing_indices:
print(f"Missing data at row {row}, column {col}")
# Load data and print missing values
base_model_df, chat_model_df, evals_df = load_data()
find_nans(base_model_df)
find_nans(chat_model_df)
##### RUN ANALYSIS ON BASE AND CHAT MODELS #####
def run_analysis(model_df, evals_df, cap_names, label, correlation_type = "spearman"):
"""
Runs normalization, computes and plots correlation matrix, performs PCA, and prints analysis results. Returns modified evals_df and model_df, and eigenvalues and correlation matrix from PCA.
We compute the correlation of each safety benchmark individually, dropping the rows that contains NaNs for each safety benchmark (if any).
"""
# Normalize data
normalized_df = model_df.drop(columns=["model_size", "FLOP", "name", "type"])
normalized_df = normalized_df.dropna(axis=1, how='all')
normalized_df = (normalized_df - normalized_df.mean()) / normalized_df.std(ddof=0)
normalized_df = normalized_df.dropna(axis=1, how='all') # In case one column has a std dev of zero and became NaN during normalization
df_cap = normalized_df[cap_names].copy().dropna() # capabilities evals dataframe
safety_names = [col for col in normalized_df.columns if col not in cap_names]
df_safety = normalized_df[safety_names].copy() # "safety" evals dataframe
# Compute capabilities correlation matrix & statistics of correlations between capabilities benchmarks
cap_matrix = df_cap.corr(method=correlation_type).to_numpy()
triu_indices = np.triu_indices_from(cap_matrix, k=1)
upper_tri_values = cap_matrix[triu_indices]
mean_correlation = np.mean(upper_tri_values)
std_dev_correlation = np.std(upper_tri_values)
# Compute capabilities scores
eigenvals, eigenvecs = np.linalg.eigh(cap_matrix)
pc = eigenvecs[:, -1]
pc = pc if np.abs(pc).max() == pc.max() else -pc
model_cap_score = df_cap.to_numpy() @ pc
# Update dataframes with capabilities scores
df_safety["cap_score"] = model_cap_score
df_cap["cap_score"] = model_cap_score
evals_df_copy = evals_df.copy()
# Compute correlations for each evaluation and update dataframes
for safety_name in safety_names:
safety_task_df = df_safety[[safety_name]].dropna() # Drop NaNs
score_df = df_safety[["cap_score"]].loc[safety_task_df.index]
reduced_df = pd.concat([safety_task_df, score_df], axis=1) # Concatenate with capabilities scores
corr_value = reduced_df.corr(method=correlation_type).to_numpy()[0, 1] # 2x2 matrix where we take entry (0,1)
evals_df_copy.loc[safety_name, f"{label}_{correlation_type}_correlations"] = corr_value # populate correlation column
for cap_name in cap_names:
cap_task_df = df_cap[[cap_name]].dropna()
score_df = df_cap[["cap_score"]].loc[cap_task_df.index]
reduced_df = pd.concat([cap_task_df, score_df], axis=1)
corr_value = reduced_df.corr(method=correlation_type).to_numpy()[0, 1]
evals_df_copy.loc[cap_name, f"{label}_{correlation_type}_correlations"] = corr_value
# Calculate total variance explained
variance_explained_pc1 = eigenvals[-1] / np.sum(eigenvals)
# Print analysis results
print(f"\n***** Results for {label} *****")
print(f"Mean correlation between two capabilities benchmarks: {mean_correlation*100:.1f}")
print(f"Standard deviation of correlation between two capabilities benchmarks: {std_dev_correlation*100:.1f}")
print(f"Total capabilities variance explained from PC1: {variance_explained_pc1*100:.1f}")
print("\nCAPABILITIES CORRELATIONS:")
for cap_name in cap_names:
print(f"{cap_name} {100*evals_df_copy.loc[cap_name, f'{label}_{correlation_type}_correlations']:.2f}")
print("\nSAFETY CORRELATIONS:")
for safety_name in safety_names:
print(f"{safety_name} {100*evals_df_copy.loc[safety_name, f'{label}_{correlation_type}_correlations']:.1f}")
print("\nMODEL SCORES:")
model_cap_dict = dict(sorted(list(zip(model_df.index, model_cap_score)), key=lambda item: item[1]))
for k, v in model_cap_dict.items():
print(f"{k}, {v:.2f}")
# Update model dataframe with scores
model_df_copy = model_df.copy()
model_df_copy["score"] = model_cap_score
return evals_df_copy, model_df_copy, eigenvals, cap_matrix
evals_df, base_model_df, base_eigenvals, base_cap_matrix = run_analysis(base_model_df, evals_df, cap_names, "Base", "spearman")
evals_df, chat_model_df, chat_eigenvals, chat_cap_matrix = run_analysis(chat_model_df, evals_df, cap_names, "Chat", "spearman")
##### PLOTTING DEMOS #####
def plot_capabilities_score(model_df):
plt.figure(figsize=(10, 7))
ax = sns.scatterplot(
data=model_df.sort_values('score'),
y='score',
x=range(len(model_df)),
color='royalblue',
s=80
)
ax.set_ylabel('Capabilities Score', fontsize=20)
ax.tick_params(axis='x', labelsize=14, rotation=90)
ax.tick_params(axis='y', labelsize=14)
ax.set_xticks(range(len(model_df)))
ax.set_xticklabels(model_df.sort_values('score').index)
plt.grid(True, linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
def plot_eigenvalues(base_eigenvals, chat_eigenvals):
plt.plot(base_eigenvals[::-1], label='Base Eigenvalues', color='blue')
plt.plot(chat_eigenvals[::-1], label='Chat Eigenvalues', color='orange')
plt.title('Eigenvalues of the Capability Correlation Matrix')
plt.xlabel('Index')
plt.ylabel('Eigenvalue')
plt.grid(True)
plt.legend()
plt.show()
def plot_safety_vs_capabilities(model_df, x_axis, benchmark_name, class_type, y_label, x_label, color, title, xlim):
gscores = model_df[x_axis].to_numpy()
fig, ax = plt.subplots(1, 1)
benchmark_scores = model_df[benchmark_name].to_numpy()
ax.scatter(gscores, benchmark_scores, color=color, s=10)
sns.regplot(x=x_axis, y=benchmark_name, data=model_df, ax=ax, color=color, scatter_kws={'s': 10}, label=class_type)
ax.grid(linestyle='--')
ax.set_ylabel(y_label, fontsize=18)
ax.set_xlabel(x_label, fontsize=18)
ax.tick_params(axis='x', labelsize=14)
ax.tick_params(axis='y', labelsize=14)
tick_values = ax.get_xticks()
if any(tick % 1 != 0 for tick in tick_values):
tick_labels = [int(tick * 100) for tick in tick_values]
ax.set_xticks(tick_values)
ax.set_xticklabels(tick_labels)
ax.set_xlim([min(gscores)-xlim, max(gscores)+xlim])
ax.set_title(title, fontsize=20)
fig.tight_layout()
plt.show()
pearson_corr, _ = stats.pearsonr(gscores, benchmark_scores)
spearman_corr, _ = stats.spearmanr(gscores, benchmark_scores)
print(f"Pearson Correlation: {pearson_corr}")
print(f"Spearman Correlation: {spearman_corr}")
def plot_capabilities_correlation_matrix(cap_matrix, label):
fig, ax = plt.subplots(figsize=(9, 9))
im = ax.imshow(cap_matrix, cmap="viridis", vmin=-0.5, vmax=1.1)
for i in range(len(cap_names)):
for j in range(len(cap_names)):
ax.text(j, i, f"{(cap_matrix[i, j]*100):.0f}", ha="center", va="center", color="w", fontsize=20)
ax.set_xticks(np.arange(len(cap_names)))
ax.set_yticks(np.arange(len(cap_names)))
ax.set_xticklabels([evals_df.loc[name]["name"] for name in cap_names], fontsize = 18)
ax.set_yticklabels([evals_df.loc[name]["name"] for name in cap_names], fontsize = 18)
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
ax.set_title(label, fontsize=35, pad=15)
fig.tight_layout()
plt.show()
plot_capabilities_score(chat_model_df)
plot_eigenvalues(base_eigenvals, chat_eigenvals)
plot_safety_vs_capabilities(
chat_model_df,
x_axis="mmlu",
benchmark_name="rmsce_mmlu",
class_type="chat",
y_label="1 - RMS Calibration Error (↑)",
x_label="MMLU",
color="green",
title="RMSCE MMLU",
xlim=0.05
)
plot_safety_vs_capabilities(
chat_model_df,
x_axis="score",
benchmark_name="truthfulqa_mc1",
class_type="chat",
y_label="Accuracy (↑)",
x_label="Capabilities Score",
color="red",
title="TruthfulQA MC1",
xlim=1
)
plot_capabilities_correlation_matrix(chat_cap_matrix, "Chat Capabilities Correlations")