iron-blogger/import-feeds.py

#!/usr/bin/python
from lxml import html
import yaml
import urllib2
import urlparse

with open('bloggers.yml') as f:
    users = yaml.safe_load(f.read())

def fetch_links(url):
    tree = html.fromstring(urllib2.urlopen(url).read())
    links = tree.xpath(
        '//link[@rel="alternate"][contains(@type, "rss") or ' +
        'contains(@type, "atom") or contains(@type, "rdf")]')
    candidates = [l for l in links if
                  'atom' in l.attrib['type'] and
                  'comments' not in l.attrib['href'].lower() and
                  'comments' not in l.attrib.get('title','')]
    if candidates:
        return candidates[0].attrib['href']
    return links[0].attrib['href']

for (name, u) in users.items():
    print "[%s]" % name
    for e in u['links']:
        (title, url) = e[0:2]
        print " - %s:" % title.strip()
        e[0] = e[0].strip()
        if len(e) == 3:
            continue
        link = fetch_links(url)
        if not link.startswith('http:'):
            link = urlparse.urljoin(url, link)
        print "   %s" % (link,)
        e.append(link)

with open('bloggers.yml', 'w') as f:
    yaml.safe_dump(users, f)