forked from nelhage/iron-blogger
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscan-feeds.py
executable file
·79 lines (65 loc) · 2 KB
/
scan-feeds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/python
import yaml
import feedparser
import datetime
import sys
import os
from dateutil.parser import parse
import dateutil.tz as tz
import settings
config=settings.load_settings()
with open('bloggers.yml') as f:
users = yaml.safe_load(f.read())
if not os.path.exists('out'):
os.makedirs('out')
try:
with open('out/report.yml') as f:
log = yaml.safe_load(f.read())
except IOError:
log = {}
START = datetime.datetime.strptime(config['start_date'],'%Y/%m/%d')
def parse_published(pub):
try:
return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None)
except:
return parse(pub).replace(tzinfo=None)
def get_date(post):
for k in ('published', 'created', 'updated'):
if k in post:
return post[k]
def get_link(post):
return post.link
def parse_feeds(weeks, uri):
feed = feedparser.parse(uri)
print >>sys.stderr, "Parsing: %s" % uri
if not feed.entries:
print >>sys.stderr, "WARN: no entries for ", uri
for post in feed.entries:
date = parse_published(get_date(post))
if date < START:
continue
wn = (date - START).days / 7
while len(weeks) <= wn:
weeks.append([])
if post.has_key('title'):
post = dict(date=date,
title=post.title,
url=get_link(post))
if not post.has_key('title'):
post = dict(date=date,
title="",
url=get_link(post))
if post['url'] not in [p['url'] for p in weeks[wn]]:
weeks[wn].append(post)
if len(sys.argv) > 1:
for username in sys.argv[1:]:
weeks = log.setdefault(username, [])
for l in users[username]['links']:
parse_feeds(weeks, l[2])
else:
for (username, u) in users.items():
weeks = log.setdefault(username, [])
for l in u['links']:
parse_feeds(weeks, l[2])
with open('out/report.yml', 'w') as f:
yaml.safe_dump(log, f)