iron-blogger/scan-feeds.py

63 lines
1.5 KiB
Python
Raw Normal View History

2009-12-29 18:37:40 +00:00
#!/usr/bin/python
2009-12-28 23:09:11 +00:00
import yaml
import feedparser
import datetime
from dateutil.parser import parse
import dateutil.tz as tz
with open('bloggers.yml') as f:
users = yaml.safe_load(f.read())
log = {}
START = datetime.datetime(2009, 12, 21, 6)
def parse_published(pub):
return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None)
def get_date(post):
if 'published' in post:
return post.published
return post.updated
def get_link(post):
if 'links' in post:
links = dict((l.rel, l) for l in post.links if 'html' in l.type)
if 'self' in links:
return links['self'].href
elif 'alternate' in links:
return links['alternate'].href
2009-12-28 23:09:11 +00:00
if 'href' in post:
return post.href
if 'link' in post:
return post.link
return None
2009-12-28 23:09:11 +00:00
def parse_feeds(weeks, uri):
feed = feedparser.parse(uri)
for post in feed.entries:
date = parse_published(get_date(post))
if date < START:
continue
wn = (date - START).days / 7
while len(weeks) <= wn:
weeks.append([])
weeks[wn].append(dict(
date=date,
title=post.title,
url=get_link(post)))
for (username, u) in users.items():
weeks = []
print "[%s]" % (username)
for l in u['links']:
parse_feeds(weeks, l[2])
log[username] = weeks
for (i, w) in enumerate(weeks):
print " [%d]: %s" % (i, w)
2009-12-28 23:24:22 +00:00
with open('out/report.yml', 'w') as f:
2009-12-28 23:09:11 +00:00
yaml.safe_dump(log, f)