iron-blogger/import-feeds.py

51 lines
1.5 KiB
Python
Executable File

#!/usr/bin/python3
from lxml import html
import yaml
import sys
import urllib.request
import urllib.parse
with open('bloggers.yml') as f:
users = yaml.safe_load(f.read())
def fetch_links(url):
print("Looking for feeds in %s" % (url,), file=sys.stderr)
try:
tree = html.document_fromstring(urllib.request.urlopen(url).read())
links = tree.xpath(
'//link[@rel="alternate"][contains(@type, "rss") or ' +
'contains(@type, "atom") or contains(@type, "rdf")]')
candidates = [l for l in links if
'atom' in l.attrib['type'] and
'comments' not in l.attrib['href'].lower() and
'comments' not in l.attrib.get('title','')]
except:
candidates = []
links = []
if candidates:
return candidates[0].attrib['href']
elif links:
return links[0].attrib['href']
else:
print("No link found for %s" % (url,), file=sys.stderr)
return None
for (name, u) in list(users.items()):
print("Processing user %s" % (name,), file=sys.stderr)
for e in u['links']:
(title, url) = e[1:3]
try:
e[1] = e[1].strip()
except:
e[1] = e[1]
if len(e) == 4:
continue
link = fetch_links(url)
if link:
if not link.startswith('http:'):
link = urllib.parse.urljoin(url, link)
e.append(link)
with open('bloggers.yml', 'w') as f:
yaml.safe_dump(users, f)