-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathportaldatatransform.py
41 lines (34 loc) · 1.23 KB
/
portaldatatransform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python2.7
import csv
import pprint
import math
## This code converts the data portal metadata csv file into JSON format that can be more easily
## used with d3
## A function that checks whether the category in each row is the first
## instance of that category or has been created already.
def check_categories(d,category):
for i in range(len(d)):
if d[i]['name'] == category: return i
return none
out = []
## Open the csv file with the data portal
with open('chicagometadata2.csv','r') as f:
## the size of the dataset is being log-transformed because there's such a huge difference
## between the largest datasets and the smallest datasets on the portal. Using log-transformed values
## will allow for a better visualization
csvf = csv.reader(f)
csvf.next()
for i in csvf:
category = i[1]
name = i[0]
views = i[3]
link = i[5]
size = int(i[6])
logsize = math.log(size)
index = check_categories(out,category)
if index == -1:
out.append({'name': category, 'children': [ {'name': name, 'value': size, 'link': link, 'log': logsize } ] })
else:
out[index]['children'].append({'name': name, 'value': size, 'link': link, 'log': logsize })
for i in out:
print(i)