-
Notifications
You must be signed in to change notification settings - Fork 0
/
Plotter.py
107 lines (100 loc) · 3.86 KB
/
Plotter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from matplotlib.pyplot import ylabel
from imports import *
import numpy as np
class Plotter:
"""
Defines a class that is used query data based on computed uncertainties
Attributes:
-----------
N: int
The amount of runs to average the experiment over
file_name: str
The name of the file containing the data needed for plotting
lines: list
List of the different line-types to be plotted
Methods
-------
plot_data(metric, seed_size, Q, ylabel, title, experiment_name, interval):
Plots the data to a linegraph with errorbars
file_to_arrays():
Processes the strings out of a file to transform them to arrays
compute_sd(experiment):
Computes the standard deviation of all points based on an experiment of N runs
"""
def __init__(self, N, file_name, lines) -> None:
"""
Parameters:
-----------
N: int
The amount of runs to average the experiment over
file_name: str
The name of the file containing the data needed for plotting
lines: list
List of the different line-types to be plotted
"""
self.N = N
self.file_name = file_name
self.lines = lines
def plot_data(self, metric, seed_size, Q, ylabel, title, experiment_name, interval):
"""
Plots the data to a linegraph with errorbars
Parameters:
-----------
metric: str
The metric used to evaluate performance
seed_size: int
The initial size of the labeled dataset
Q: int
The amount of examples queried in an active learning round
ylabel: str
The label of the y-axis
title: str
The title of the plot
experiment_name: str
The title of the experiment which will be used in the name of the save
interval: int
The interval by which to plot points out of the data arrays
"""
data = self.file_to_arrays()
fig, ax = plt.subplots()
for i, experiment in enumerate(data):
if metric == 'accuracy':
experiment *= 100
x = np.linspace(
seed_size, Q*len(data[0][0]), num=len(experiment[0]))
y = np.divide(np.sum(experiment, axis=0), self.N)
sd = self.compute_sd(experiment)
markers, caps, bars = ax.errorbar(x[::interval[i]], y[::interval[i]], sd[::interval[i]],
color=self.lines[i].get_color(), marker=self.lines[i].get_marker())
[bar.set_alpha(0.5) for bar in bars]
[cap.set_alpha(0.5) for cap in caps]
ax.legend(handles=self.lines)
ax.set_ylabel(ylabel)
ax.set_xlabel(r'Size of $\mathcal{L}$')
ax.set_title(title)
plt.savefig('./Plots/' + experiment_name + str(metric) + str(Q))
def file_to_arrays(self):
"""
Processes the strings out of a file to transform them to arrays
Returns:
--------
data: list
The arrays containing the values of the different runs/experiments
"""
data = []
experiment = []
with open(self.file_name, 'r') as f:
for i, line in enumerate(f):
experiment.append(np.fromstring(line, sep=' '))
if (i + 1) % self.N == 0 and i != 0:
data.append(np.array(experiment))
experiment = []
return data
def compute_sd(self, experiment):
"""Computes and returns the standard deviation of all points based on an experiment of N runs"""
sds = []
for i in range(len(experiment[0])):
mean = sum(experiment[:, i])/len(experiment[:, i])
sd = np.sqrt((1/self.N)*sum(np.power(experiment[:, i]-mean, 2)))
sds.append(sd)
return sds