-
Notifications
You must be signed in to change notification settings - Fork 3
/
merge.py
146 lines (136 loc) · 4.94 KB
/
merge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
"""
Quick utilities for cleaning up the source data.
"""
import csv
import json
import calculate
from pprint import pprint
class PalmeroFTW(object):
"""
All the tricks.
"""
primary_csv_path = "./input/votos_establecimiento_caba_paso.csv"
general_csv_path = "./input/votos_establecimiento_cordoba_octubre.csv"
location_json_path = "./input/locales_cordoba_geocode.geojson"
listas = [
'003',
'047',
'191',
'217',
'501',
'503',
'505',
'512',
'514',
'9003',
'9004',
'9005',
'9006',
]
outheaders = [
'fake_id',
# '187', # Partido Autodeterminacion y Libertad (Dark blue)
# '501', # Allanza Frente para la Victoria (Light Blue)
# '502', # Allanza UNEN (Green)
# '503', # Allanza Union Pro (Yellow)
# '505', # Allanza Fet. de Izq.y de los Trabajadores (red)
# '506', # Allanza Camino Popular (Gray)
] + listas + ['overall_total']
outcsv_path = "output/merged_totals.csv"
outjson_path = "static/test.geojson"
def merge(self):
"""
Merge the transformed csv file with the geojson with all
the voting locations.
"""
# Open raw geojson
json_data = open(self.location_json_path, "rb").read()
# Get it in Python
json_data = json.loads(json_data)
# Create list for stuff after we merge
merged_features = []
# Open the csv with results
csv_data = csv.DictReader(open(self.outcsv_path, 'r'))
# Key it by ID
csv_data = dict((
i['fake_id'], i
) for i in csv_data)
# Loop through the features
for row in json_data['features']:
# Figure out each fake id
fake_id = "%s-%s" % (
row['properties']['mesa_desde'],
row['properties']['mesa_hasta']
)
try:
results_data = csv_data[fake_id]
except KeyError:
print "Does not have data for the tables %s" % fake_id
# Filter it down to the data we want to keep
merged_dict = {
'geometry': row['geometry'],
# 'id': row['id'],
'properties': {
'direccion': row['properties']['direccion'],
'establecim': row['properties']['establecim'],
'seccion': row['properties']['seccion'],
'circuito': row['properties']['circuito'],
'overall_total': int(results_data['overall_total']),
'fake_id': results_data['fake_id'],
},
'type': 'Feature'
}
merged_dict['properties']["votos"] = {}
for party in self.listas:
merged_dict['properties']["votos"][party] = int(results_data[party])
# Toss it in the global list
merged_features.append(merged_dict)
# Structure out new merged JSON
new_json = {
'type': json_data['type'],
'features': merged_features
}
# Write it out to a file
outjson = open(self.outjson_path, "wb")
outjson.write(json.dumps(new_json))
def transform(self):
"""
Transform the results file so that there is only one row
for each precinct, with some summary stats calculated.
"""
# Open the CSV
general_csv = csv.DictReader(open(self.general_csv_path, 'r'))
# Loop through the rows
grouped_by_school = {}
for row in general_csv:
# And regroup them so each fake id is keyed to
# all of the list totals for that precinct
fake_id = "%s-%s" % (row['mesa_desde'], row['mesa_hasta'])
try:
grouped_by_school[fake_id][row['vot_parcodigo']] = row['total']
except KeyError:
grouped_by_school[fake_id] = {
row['vot_parcodigo']: row['total']
}
# Now loop through that
outrows = []
for fake_id, totals in grouped_by_school.items():
# Figure out the overall total of votes
overall_total = sum(map(int, totals.values()))
# Start up a row to print out
outrow = [fake_id,]
# Load in the lists in the same "alphabetical" order
for list_, total in sorted(totals.items(), key=lambda x:x[0]):
outrow.append(int(total))
# Load in the extra stuff we've calculated
outrow.append(int(overall_total))
# Add this row to the global list outside the loop
outrows.append(outrow)
# Open up a text file and write out all the data
outcsv = csv.writer(open(self.outcsv_path, 'w'))
outcsv.writerow(self.outheaders)
outcsv.writerows(outrows)
if __name__ == '__main__':
pftw = PalmeroFTW()
pftw.transform()
pftw.merge()