2009-12-29 18:37:40 +00:00
|
|
|
#!/usr/bin/python
|
2009-12-28 23:09:11 +00:00
|
|
|
import yaml
|
|
|
|
import feedparser
|
|
|
|
import datetime
|
2010-01-18 03:45:33 +00:00
|
|
|
import sys
|
2012-01-19 19:03:49 +00:00
|
|
|
import os
|
2009-12-28 23:09:11 +00:00
|
|
|
from dateutil.parser import parse
|
|
|
|
import dateutil.tz as tz
|
|
|
|
|
|
|
|
with open('bloggers.yml') as f:
|
|
|
|
users = yaml.safe_load(f.read())
|
|
|
|
|
2012-01-19 19:03:49 +00:00
|
|
|
if not os.path.exists('out'):
|
|
|
|
os.makedirs('out')
|
2010-01-19 15:52:29 +00:00
|
|
|
try:
|
|
|
|
with open('out/report.yml') as f:
|
|
|
|
log = yaml.safe_load(f.read())
|
|
|
|
except IOError:
|
|
|
|
log = {}
|
2009-12-28 23:09:11 +00:00
|
|
|
|
2012-01-19 18:32:24 +00:00
|
|
|
START = datetime.datetime(2011, 12, 25, 0)
|
2009-12-28 23:09:11 +00:00
|
|
|
|
|
|
|
def parse_published(pub):
|
2012-01-19 18:32:24 +00:00
|
|
|
try:
|
|
|
|
return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None)
|
|
|
|
except:
|
|
|
|
return parse(pub).replace(tzinfo=None)
|
2009-12-28 23:09:11 +00:00
|
|
|
def get_date(post):
|
2010-02-15 20:30:33 +00:00
|
|
|
for k in ('published', 'created', 'updated'):
|
|
|
|
if k in post:
|
|
|
|
return post[k]
|
2009-12-28 23:09:11 +00:00
|
|
|
|
|
|
|
def get_link(post):
|
2010-01-03 16:25:21 +00:00
|
|
|
return post.link
|
2009-12-28 23:09:11 +00:00
|
|
|
|
|
|
|
def parse_feeds(weeks, uri):
|
|
|
|
feed = feedparser.parse(uri)
|
2012-01-19 18:32:24 +00:00
|
|
|
|
|
|
|
print >>sys.stderr, "Parsing: %s" % uri
|
|
|
|
|
2010-01-18 03:45:33 +00:00
|
|
|
if not feed.entries:
|
|
|
|
print >>sys.stderr, "WARN: no entries for ", uri
|
2009-12-28 23:09:11 +00:00
|
|
|
for post in feed.entries:
|
|
|
|
date = parse_published(get_date(post))
|
|
|
|
|
|
|
|
if date < START:
|
|
|
|
continue
|
|
|
|
wn = (date - START).days / 7
|
|
|
|
|
|
|
|
while len(weeks) <= wn:
|
|
|
|
weeks.append([])
|
2010-01-19 15:52:29 +00:00
|
|
|
|
|
|
|
post = dict(date=date,
|
|
|
|
title=post.title,
|
|
|
|
url=get_link(post))
|
|
|
|
if post['url'] not in [p['url'] for p in weeks[wn]]:
|
|
|
|
weeks[wn].append(post)
|
2009-12-28 23:09:11 +00:00
|
|
|
|
2010-05-24 19:21:03 +00:00
|
|
|
if len(sys.argv) > 1:
|
|
|
|
for username in sys.argv[1:]:
|
|
|
|
weeks = log.setdefault(username, [])
|
|
|
|
for l in users[username]['links']:
|
|
|
|
parse_feeds(weeks, l[2])
|
|
|
|
else:
|
|
|
|
for (username, u) in users.items():
|
|
|
|
weeks = log.setdefault(username, [])
|
|
|
|
for l in u['links']:
|
|
|
|
parse_feeds(weeks, l[2])
|
2009-12-28 23:09:11 +00:00
|
|
|
|
2009-12-28 23:24:22 +00:00
|
|
|
with open('out/report.yml', 'w') as f:
|
2009-12-28 23:09:11 +00:00
|
|
|
yaml.safe_dump(log, f)
|