import-feeds: Handle blogs without <link> tags.
This commit is contained in:
parent
a7da50b1ff
commit
8ae4ec8890
|
@ -1,6 +1,7 @@
|
||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import yaml
|
import yaml
|
||||||
|
import sys
|
||||||
import urllib2
|
import urllib2
|
||||||
import urlparse
|
import urlparse
|
||||||
|
|
||||||
|
@ -18,7 +19,11 @@ def fetch_links(url):
|
||||||
'comments' not in l.attrib.get('title','')]
|
'comments' not in l.attrib.get('title','')]
|
||||||
if candidates:
|
if candidates:
|
||||||
return candidates[0].attrib['href']
|
return candidates[0].attrib['href']
|
||||||
return links[0].attrib['href']
|
elif links:
|
||||||
|
return links[0].attrib['href']
|
||||||
|
else:
|
||||||
|
print >>sys.stderr, "No link found for %s" % (url,)
|
||||||
|
return None
|
||||||
|
|
||||||
for (name, u) in users.items():
|
for (name, u) in users.items():
|
||||||
for e in u['links']:
|
for e in u['links']:
|
||||||
|
@ -27,9 +32,10 @@ for (name, u) in users.items():
|
||||||
if len(e) == 3:
|
if len(e) == 3:
|
||||||
continue
|
continue
|
||||||
link = fetch_links(url)
|
link = fetch_links(url)
|
||||||
if not link.startswith('http:'):
|
if link:
|
||||||
link = urlparse.urljoin(url, link)
|
if not link.startswith('http:'):
|
||||||
e.append(link)
|
link = urlparse.urljoin(url, link)
|
||||||
|
e.append(link)
|
||||||
|
|
||||||
with open('bloggers.yml', 'w') as f:
|
with open('bloggers.yml', 'w') as f:
|
||||||
yaml.safe_dump(users, f)
|
yaml.safe_dump(users, f)
|
||||||
|
|
Loading…
Reference in New Issue