51 lines
1.5 KiB
Python
Executable file
51 lines
1.5 KiB
Python
Executable file
#!/usr/bin/python3
|
|
from lxml import html
|
|
import yaml
|
|
import sys
|
|
import urllib.request
|
|
import urllib.parse
|
|
|
|
with open('bloggers.yml') as f:
|
|
users = yaml.safe_load(f.read())
|
|
|
|
def fetch_links(url):
|
|
print("Looking for feeds in %s" % (url,), file=sys.stderr)
|
|
try:
|
|
tree = html.document_fromstring(urllib.request.urlopen(url).read())
|
|
links = tree.xpath(
|
|
'//link[@rel="alternate"][contains(@type, "rss") or ' +
|
|
'contains(@type, "atom") or contains(@type, "rdf")]')
|
|
candidates = [l for l in links if
|
|
'atom' in l.attrib['type'] and
|
|
'comments' not in l.attrib['href'].lower() and
|
|
'comments' not in l.attrib.get('title','')]
|
|
except:
|
|
candidates = []
|
|
links = []
|
|
if candidates:
|
|
return candidates[0].attrib['href']
|
|
elif links:
|
|
return links[0].attrib['href']
|
|
else:
|
|
print("No link found for %s" % (url,), file=sys.stderr)
|
|
return None
|
|
|
|
for (name, u) in list(users.items()):
|
|
print("Processing user %s" % (name,), file=sys.stderr)
|
|
for e in u['links']:
|
|
(title, url) = e[1:3]
|
|
try:
|
|
e[1] = e[1].strip()
|
|
except:
|
|
e[1] = e[1]
|
|
if len(e) == 4:
|
|
continue
|
|
link = fetch_links(url)
|
|
if link:
|
|
if not link.startswith('http:'):
|
|
link = urllib.parse.urljoin(url, link)
|
|
e.append(link)
|
|
|
|
with open('bloggers.yml', 'w') as f:
|
|
yaml.safe_dump(users, f)
|