-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathretrieve_hn_user_data.py
38 lines (32 loc) · 1.3 KB
/
retrieve_hn_user_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from BeautifulSoup import BeautifulSoup
import urllib2
from datetime import date, timedelta
import re
def retrieve_hn_user_data(hn_username):
"""Retrieve information contained on HN user's HN profile.
Takes a username.
Returns dict with keys: ["user", "created", "karma", "about"]
"""
hn_profile_url = "http://news.ycombinator.com/user?id=%s" % hn_username
page = urllib2.urlopen(hn_profile_url).read()
soup = BeautifulSoup(page)
fields = soup.findAll("td", attrs={'valign': 'top'})
user_data = dict()
for f in fields:
field_key = f.string.replace(":", "")
contents = f.findNextSibling().contents
user_data[field_key] = " ".join(str(c) for c in contents)
expected_keys = ["user", "created", "karma", "about"]
e = "Expected fields not in HN profile. Probably invalid username."
if not all(map(lambda x: x in user_data.keys(), expected_keys)):
raise Exception, e
# convert the "created X days ago" to a python datetime
if not "day" in user_data["created"]:
days = 0
else:
days = int(re.match("\d+", user_data["created"]).group())
user_data["created"] = date.today() - timedelta(days=days)
return user_data
if __name__ == "__main__":
loser = "ezl"
user_data = retrieve_hn_user_data(loser)