-
Notifications
You must be signed in to change notification settings - Fork 0
/
language-event-count-reducer.py
54 lines (40 loc) · 1.32 KB
/
language-event-count-reducer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python
import sys
import json
# tally event counts per language
languages = {}
for line in sys.stdin:
cleanline = line.strip()
repo, language, events_json = cleanline.split('\t')
events = json.loads(events_json)
if language not in languages:
languages[language] = {
'events': {},
'repos': set([repo]),
'actors': set([])
}
else:
languages[language]['repos'].add(repo)
for event, contents in events.iteritems():
count = contents['count']
actors_count = contents['actors_count']
new_actors = languages[language]['actors'].union(contents['actors'])
languages[language]['actors'] = new_actors
if event not in languages[language]['events']:
languages[language]['events'][event] = {
'count': count,
'actors_count': actors_count
}
else:
new_c = languages[language]['events'][event]['count'] + count
new_ac = languages[language]['events'][event]['actors_count'] + actors_count
languages[language]['events'][event] = {
'count': new_c,
'actors_count': new_ac
}
for language, details in languages.iteritems():
repo_count = len(details['repos'])
languages[language]['repos'] = repo_count
actor_count = len(details['actors'])
languages[language]['actors'] = actor_count
print json.dumps(languages)