-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGraph.py
70 lines (66 loc) · 3.11 KB
/
Graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import matplotlib.pyplot as plt
from collections import Counter
from typing import *
def findQuartiles(pool, val_increment=1.0)->Tuple[float, float, float]:
# Find quartiles by graph area method.
# IE first quartile should have 1/4 of the total bar graph area to the left
# This assumes all bars are 1 unit wide. In cases where the histogram is regularly gappy,
# eg. when there are only EVEN bars, it might be argued that the interpolated values should
# be able to appear in the gaps, which this function will not do.
l = len(pool)
counter = Counter(pool)
def helper(frac)->float:
# Return a value such that frac amount of graph area is to the left of the argument
area_target = l * frac
area = 0.0
val = min(counter)
while True:
new_area = area + counter[val]
if new_area > area_target:
# This iteration would put us over the area_target
break
area = new_area
val += 1
# val represents the bar which must now be sub-divided
bar_height = counter[val] / val_increment
bar_left_edge = val - (0.5*val_increment)
area_delta = area_target - area
rval = bar_left_edge + (area_delta / bar_height)
return rval
return helper(0.25), helper(0.5), helper(0.75)
def determineIncrement(counter:Counter)->int:
# Create a new counter of the deltas between adjacent values
sorted_unique_vals = sorted(counter.keys())
if len(sorted_unique_vals) == 1: return 1
sorted_unique_deltas = sorted(sorted_unique_vals[i+1]-sorted_unique_vals[i] for i in range(len(sorted_unique_vals)-1))
# Now we need to find the largest integer which divides into all deltas
for span in range(sorted_unique_deltas[0], 0, -1):
if all(d % span == 0 for d in sorted_unique_deltas):
return span
assert(False, "Should never get here!")
def makeBarGraph(pool, increment=None):
counter = Counter(pool)
if increment is None:
increment = determineIncrement(counter)
plt.figure(figsize=(10, 5))
pool_len = len(pool)
percentages = [100*v / pool_len for v in counter.values()]
plt.bar(counter.keys(), percentages, color='skyblue', width=increment-0.025)
q1, q2, q3 = findQuartiles(pool, increment)
plt.axvline(x=q1, color='green', linestyle='--', label='Q1: %0.2f'%q1)
plt.axvline(x=q2, color='red', linestyle='--', label='Q2: %0.2f'%q2)
plt.axvline(x=q3, color='blue', linestyle='--', label='Q3: %0.2f'%q3)
plt.xlabel('Value')
plt.ylabel('Frequency (%)')
plt.title('Pool distribution for %d values'%len(pool))
plt.xticks(range(min(counter.keys()), max(counter.keys()) + 1, increment))
plt.legend()
plt.grid(axis='y', linestyle='--')
plt.show()
if __name__ == "__main__":
#makeBarGraph([1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3])
makeBarGraph([1]*10 + [3]*20 + [5]*10)
makeBarGraph([1] * 10)
makeBarGraph([1] * 10 + [4] * 10 + [7] * 10)
makeBarGraph([1] * 10 + [4] * 10 + [7] * 10 + [9] * 10)
makeBarGraph([1] * 10 + [3] * 10 + [5] * 10)