-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
322 lines (289 loc) · 13.8 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# coding=utf-8
# Utilities for working with reformatted output CSVs and plotting
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Column lists for quicker access
class col:
stencil = [ "stencil" ]
gridtype = [ "unstructured" ]
variant = [ "variant" ]
z_curves = [ "z-curves" ]
no_chase = [ "no-chase" ]
comp = [ "comp" ]
size = [ "size-x", "size-y", "size-z" ]
threads = [ "threads-x", "threads-y", "threads-z" ]
blocks = [ "blocks-x", "blocks-y", "blocks-z" ]
measurements = [ "min", "max", "avg", "median" ]
launchconfig = threads + blocks
access = gridtype + variant
storage = z_curves + no_chase + comp
category = stencil + access + storage
problem = stencil + size + gridtype
benchmark = problem + variant + storage
a = benchmark + threads + measurements
# Return dataframe reduced to one entry with minimal value to minimize per group
def groupmin(df, by=col.problem, minimize="median"):
tmp = df.reset_index(drop=True)
return tmp.loc[tmp.groupby(by, sort=False)[minimize].idxmin()]
# Return series of relative values relative to minimum element in same group
def relmingroup(df, by=col.problem, to="median"):
grpmins = groupmin(df, by, minimize=to)
return relgroup(df, grpmins, by, to)
# Return series of relative values relative to specific element per group
def relgroup(df, df_base, by=col.stencil+col.size, to="median"):
cols = by + [to]
tmp = df.merge(df_base[cols], on=by, how="left")
assert len(tmp) == len(df) # if this fails, the input df_base had multiple matches for a base "by"; maybe use aggregate .min()?
tmp.index = df.index
return tmp[to + "_x"] / tmp[to + "_y"]
# Setup ultimate dataframe
def setup():
large = pd.read_csv("results/ultimate-reformat.csv")
medium = pd.read_csv("results/ultimate-128-reformat.csv")
small = pd.read_csv("results/ultimate-64-reformat.csv")
small[col.measurements] /= 1000 # micro to nanoseconds
df = pd.concat([large, medium, small], ignore_index=True)
df.sort_values("median", inplace=True)
return df
# ####################
# NAME PRETTY PRINTING
# ####################
# Definitions for prettyprinting
stencil_names = { "laplap" : "Laplace-of-Laplace",
"hdiff" : "Horizontal diffusion",
"fastwaves" : "Fastwaves" }
variant_names = { "idxvar" : "index variables",
"idxvar-kloop" : "z-loop",
"idxvar-kloop-sliced" : "sliced z-loop",
"idxvar-shared" : "shared" }
column_titles = { "pretty" : "Benchmark",
"size-x" : "Domain size (X)", "size-y" : "Domain size (Y)", "size-z" : "Domain size (Z)",
"blocks-x" : "Number of blocks (X)", "blocks-y" : "Number of blocks (Y)", "blocks-z" : "Number of blocks (Z)",
"threads-x" : "Number of threads (X)", "threads-y" : "Number of threads (Y)", "threads-z" : "Number of threads (Z)",
"size-prod" : "Domain size",
"blocks-prod" : "Number of blocks",
"threads-prod" : "Number of threads",
"avg" : "Average runtime [μs]",
"median" : "Median runtime [μs]",
"min" : "Minimal runtime [μs]",
"max" : "Maximal runtime [μs]",
"rel" : "Runtime [relative to baseline]"}
# Pretty-print names for graphs
# cols: Columns to be considered for generating pretty-print name (make sure to exclude any measurement values that vary for rows of the same data set)
# The cols array also prescribes the order of the pretty-printed name
# join: How the name parts built from the columns are joined
# fmt: default format to be applied for columns with no formatter specified in formatters
# formatters: dict column_name -> function(column_value); the function must return a string that will be used as the pretty print value for that column
def pretty(df, cols=col.category, **kwargs):
columns = [ x for x in cols if x in df and len(np.unique(df[x])) > 1 ]
return df.apply(lambda row: pretty_cb(row, columns, **kwargs), axis=1)
# Graph title: all unvarying components of names, equal for entire graph
def title(df, cols=col.category, **kwargs):
columns = [ x for x in cols if x in df and len(np.unique(df[x])) == 1 ]
return pretty_cb(df.iloc[0], columns, **kwargs)
def pretty_cb(row, cols=col.category, fmt="{1}", join=", ",
formatters={ "unstructured" : lambda x, r: "unstructured" if x else "regular",
"z-curves" : lambda x, r: ("z-curves" if x else "row-major") if not ("unstructured" in r and not r["unstructured"]) else None,
"no-chase" : lambda x, r: ("non-chasing" if x else "chasing") if not ("unstructured" in r and not r["unstructured"]) else None,
"comp" : lambda x, r: ("compressed" if x else "uncompressed") if not ("unstructured" in r and not r["unstructured"]) else None,
"stencil" : lambda x, r: stencil_names[x] if x in stencil_names else x.capitalize(),
"variant" : lambda x, r: variant_names[x] if x in variant_names else x,
"size-x" : lambda x, r: "{0}×{1}×{2}".format(x, r["size-y"], r["size-z"]) if "size-y" in r and "size-z" in r else None,
"size-y" : lambda x, r: None,
"size-z" : lambda x, r: None }):
out = []
for col in cols:
if col not in row:
continue
if col in formatters:
out.append(formatters[col](row[col], row))
else:
out.append(fmt.format(col, row[col]))
return join.join([x for x in out if x])
# ##############
# PLOTTING
# ##############
# Setup common plot params
def plotinit():
plt.style.use("seaborn")
def plotdone(fig=None, legend=2, w=6, h=4):
if not fig:
fig = plt.gcf()
fig.set_size_inches(w, h)
plotlegend(fig.gca(), legend)
plt.tight_layout()
fig.show()
return fig
def plotsave(f, fig=None):
if not fig:
fig = plt.gcf()
plt.tight_layout()
fig.savefig(f, dpi=300)
return fig
def plotlegend(ax=None, pos=2, **kwargs):
if not ax:
ax = plt.gca()
if(pos == 0):
ax.legend(**kwargs)
elif(pos == 1):
ax.legend(loc="upper left", bbox_to_anchor=(0, 1), ncol=2, **kwargs)
else:
ax.legend(loc="upper left", bbox_to_anchor=(1, 1), ncol=1, **kwargs)
# Line plot
def lineplot(df, by=col.category, x="threads-z", y="median",
label="pretty", color="color", marker="marker", ax=None,
xticks=True,
**kwargs):
if not ax:
ax = plt.gca()
if color not in df.columns or marker not in df.columns:
df = add_colors_markers(df)
color = "color"
marker = "marker"
if not label in df.columns:
df = df.copy()
df[label] = pretty(df)
mins = groupmin(df, by + [x], minimize=y) # ensure one point per line & X
lines = mins.groupby(by)
for i, data in lines:
ax.plot(data[x], data[y],
label=data[label].iat[0] if label in data else None,
color=data[color].iat[0] if color in data else None,
marker=data[marker].iat[0] if marker in data else None,
**kwargs)
if xticks:
ax.set_xticks(np.unique(df[x]))
ax.set_xlabel(column_titles[x] if x in column_titles else x)
ax.set_ylabel(column_titles[y] if y in column_titles else y)
ax.set_title(title(mins))
ax.grid(axis="both")
# Grouped bars plot
# cat: categories, these are differentiated by colors + legend
# grp: groups, these appear togheter as bars next to each other
def barplot(df, cat=col.access, grp=col.storage + col.gridtype, y="median",
color="color", ax=None,
w=1, s=1.6, tickrot=15,
grp_pretty=None, cat_pretty=None, **kwargs):
if not ax:
ax = plt.gca()
if not color in df.columns:
df = add_colors_markers(df)
color = "color"
if not grp_pretty:
grp_pretty=grp
if not cat_pretty:
cat_pretty=cat
mins = groupmin(df, cat + grp, minimize=y) # ensure one point per bar
mins = mins.reset_index(drop=True) # ensure one entry per index only (later index is used to identify rows)
groups = mins.groupby(grp, sort=False)
group_counts = groups[y].count()
group_inner_no = groups.cumcount()
group_numbers = pd.Series(np.arange(0, len(group_counts)), index=group_counts.index)
group_offsets = pd.Series(np.roll(group_counts.cumsum(), 1), index=group_counts.index)
group_offsets.iloc[0] = 0
group_labels = pretty(group_counts.reset_index(), cols=grp_pretty)
categories = mins.groupby(cat, as_index=False)
category_labels = pretty(mins, cols=cat_pretty)
for i, (category, data) in enumerate(categories):
inner_no = [group_inner_no.loc[x] for x in data.index]
group_no = data[grp].apply(lambda x: group_numbers.loc[tuple(x[grp])], axis=1)
group_offs = data[grp].apply(lambda x: group_offsets.loc[tuple(x[grp])], axis=1)
xs = group_no*s + group_offs*w + inner_no*w
label = category_labels.loc[data.iloc[0].name]
ax.bar(x=xs.values, height=data[y].values, label=label,
color=data[color] if color in data else None)
ax.set_xticks(group_numbers*s + group_offsets*w + (group_counts-1)*w*0.5)
if tickrot == 0:
ax.set_xticklabels(group_labels, rotation=0, horizontalalignment="center")
else:
ax.set_xticklabels(group_labels, rotation=tickrot, horizontalalignment="right")
ax.set_ylabel(column_titles[y] if y in column_titles else y)
ax.set_title(title(mins))
ax.grid(axis="x")
return ax
# ##############
# COLORS/MARKERS
# ##############
# Return dataframe of markers/colors
def dfmap(df, cols, mapping, default=""):
return df[cols].apply(lambda x: mapping[tuple(x)] if tuple(x) in mapping
else default, axis=1)
# Add markers/colors to dataframe
def add_colors_markers(df, color=None, marker=None, default=""):
if not color:
nvariant = len(np.unique(df[col.variant]))
nstencil = len(np.unique(df[col.stencil]))
nstorage = len(np.unique(df[col.storage], axis=0))
if nvariant == max(nvariant, nstencil, nstorage):
color = "variant"
elif nstencil == max(nvariant, nstencil, nstorage):
color = "stencil"
else:
color = "storage"
if marker == None and color:
marker = color
colfun = ( colors_variant if color == "variant"
else colors_stencil if color == "stencil"
else colors_storage if color == "storage"
else lambda x: default)
marfun = ( markers_variant if marker == "variant"
else markers_stencil if marker == "stencil"
else markers_storage if marker == "storage"
else lambda x: default)
tmp = df.copy()
tmp["color"] = colfun(df)
tmp["marker"] = marfun(df)
return tmp
# Variant markers; use col.variant
def colors_variant(df):
return dfmap(df, cols = col.variant,
mapping = { ("naive",) : "C1",
("idxvar",) : "C2",
("idxvar-kloop",) : "C3",
("idxvar-kloop-sliced",) : "C4",
("idxvar-shared",) : "C5",
("regular",) : "black" })
def markers_variant(df):
return dfmap(df, cols = col.variant,
mapping = { ("naive",) : "o",
("idxvar",) : "*",
("idxvar-kloop",) : "v",
("idxvar-kloop-sliced",) : "^",
("idxvar-shared",) : "P"} )
def colors_stencil(df):
return dfmap(df, cols = col.stencil + col.gridtype,
mapping = { ("hdiff", False) : "C1",
("hdiff", True) : "C2",
("laplap", False) : "C3",
("laplap", True) : "C4",
("fastwaves", False) : "C5",
("fastwaves", True) : "C6"} )
def markers_stencil(df):
return dfmap(df, cols = col.stencil + col.gridtype,
mapping = { ("hdiff", False) : "o",
("hdiff", True) : "*",
("laplap", False) : "v",
("laplap", True) : "^",
("fastwaves", False) : "P",
("fastwaves", True) : "8"} )
def colors_storage(df):
return dfmap(df, cols = col.z_curves + col.no_chase + col.comp,
mapping = { (False, False, False) : "C1",
(False, False, True) : "C2",
(False, True, False) : "C3",
(True, False, False) : "C4",
(True, True, False) : "C5",
(False, True, True) : "C6",
(True, False, True) : "grey",
(True, True, True) : "darkorange" } )
def markers_storage(df):
return dfmap(df, cols = col.z_curves + col.no_chase + col.comp,
mapping = { (False, False, False) : "o",
(False, False, True) : "*",
(False, True, False) : "v",
(True, False, False) : "^",
(True, True, False) : "P",
(False, True, True) : "8",
(True, False, True) : "p",
(True, True, True) : "H" } )