Compare commits

...

31 Commits

Author SHA1 Message Date
Thomas Renger 28e032e0cf Git actions and new config file format 2024-03-05 22:51:03 +01:00
Thomas Renger b091d20bf3 Start: load/save data dynamically from git (WIP) 2023-12-28 23:05:28 +01:00
Thomas Renger 512619b214 tYp0 2023-12-28 23:03:06 +01:00
Thomas Renger 3c4eeed4c0 Rename scan-feeds to blogbot 2023-12-28 23:00:34 +01:00
Thomas Renger cbfd679f1c Add very basic docker build 2023-12-28 16:08:01 +01:00
Thomas Renger 5cb8b8b7e0 also parse users without "end:" 2020-02-29 16:37:31 +01:00
Thomas Renger 33a6e74aa8 Skip inactive bloggers
Don't try to parse the feeds of bloggers with "end:" date in the past.
2020-02-29 16:13:13 +01:00
Thomas Renger da35708096 UTF-8 decode participants list 2019-06-02 22:24:58 +02:00
Thomas Renger 6666300c93 Merge branch 'python3' of iron-blogger/iron-blogger into master
Python 2 is no longer able to connect to modern https-Servers.

Also it’s outdated. ;-)
2019-03-23 11:10:23 +00:00
Thomas Renger 8901fc53b3 obsolete 2019-03-11 13:40:58 +01:00
Iron Blogger Bot 9ff394df38 python3 based template engine complained about the email template 2019-03-08 13:45:30 +01:00
Iron Blogger Bot 2709e81679 Convert python source to version 3 2019-03-08 13:43:50 +01:00
Thomas Renger d12e511c1b sorting, special case for exactly 11 articles 2014-04-25 10:51:42 +02:00
Thomas Renger da246e10dc Update for 2014/04/07 2014-04-13 12:24:01 +02:00
Thomas Renger 837c431838 Update for 2014/04/07 2014-04-13 12:18:13 +02:00
Thomas Renger e69b049a8e Update for 2014/04/07 2014-04-13 12:15:18 +02:00
Thomas Renger 6e4e76de04 Update for 2014/04/07 2014-04-13 12:14:36 +02:00
Thomas Renger f298a168d4 limit 10 2014-04-13 00:18:50 +02:00
Thomas Renger 68e60a2488 Update for 2014/04/07 2014-04-13 00:12:34 +02:00
Thomas Renger cbb973d7c8 Update for 2014/04/07 2014-04-13 00:10:34 +02:00
Thomas Renger 8720853638 test 2014-04-12 22:44:54 +02:00
Iron Blogger Bot df564a4806 logging improvements 2013-09-18 12:15:49 +02:00
Iron Blogger Bot 94837624f1 user-agent 2013-09-18 11:49:34 +02:00
Thomas Renger ffb548cb39 resources for doing nothing -- with a database. 2013-09-08 17:56:36 +02:00
Thomas Renger 5176e6b2fb Test 2013-09-08 16:57:20 +02:00
Thomas Renger 572571fe16 Merge branch 'master' of ssh://wazong.de/iron-blogger into java-version 2013-06-13 22:31:25 +02:00
Thomas Renger c8490a96b6 Andere Fehlermeldung. Immerhin. 2013-06-13 22:30:43 +02:00
Iron Blogger Bot ae7aac0115 Bcc reminder 2013-06-08 21:06:49 +02:00
Thomas Renger b5aac92cd5 Add JSF (not working yet) 2013-05-28 18:04:54 +02:00
Thomas Renger 7f20e2ffe6 Add Richfaces 2013-05-26 14:22:03 +02:00
Thomas Renger 6d3b4cae6f new project 2013-05-25 12:06:32 +02:00
14 changed files with 232 additions and 155 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
*.pyc
settings.cfg
out/
data/

9
Dockerfile Normal file
View File

@ -0,0 +1,9 @@
FROM python:3
WORKDIR /usr/src/app
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
CMD [ "python", "./blogbot.py" ]

140
blogbot.py Executable file
View File

@ -0,0 +1,140 @@
#!/usr/bin/python3
import yaml
import feedparser
import datetime
import sys
import os
import shutil
import re
from dateutil.parser import parse
import dateutil.tz as tz
import settings
from git import Repo
def parse_published(pub):
try:
return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None)
except:
return parse(pub).replace(tzinfo=None)
def get_date(post):
for k in ('published', 'created', 'updated'):
if k in post:
return post[k]
def get_link(post):
return post.link
def get_title(post):
if 'title' in post:
return post.title
else:
return ''
def remove_html_tags(txt):
p = re.compile(r'<[^<]*?/?>')
return p.sub('', txt)
def remove_extra_spaces(txt):
p = re.compile(r'\s+')
return p.sub(' ', txt)
def create_extract(txt):
stxt = remove_extra_spaces(remove_html_tags(txt))
if len(stxt) < 250:
return stxt
if stxt.rfind('. ',200,250)>0:
return stxt[:stxt.rfind('. ',200,250)+1]+" [...]"
if stxt.rfind('! ',200,250)>0:
return stxt[:stxt.rfind('! ',200,250)+1]+" [...]"
if stxt.rfind('? ',200,250)>0:
return stxt[:stxt.rfind('? ',200,250)+1]+" [...]"
if stxt.rfind(', ',200,250)>0:
return stxt[:stxt.rfind(', ',200,250)+1]+" [...]"
if stxt.rfind(' ',200,250)>0:
return stxt[:stxt.rfind(' ',200,250)]+" [...]"
return stxt[:250]+"[...]"
def parse_feeds(weeks, username, blog):
feedparser.USER_AGENT = "IronBloggerBot/0.2 +http://ironblogger.de/"
uri = blog['feed']
print("Retreiving ", uri)
feed = feedparser.parse(uri)
if not feed.entries:
print("WARN: no entries for ", uri, file=sys.stderr)
for post in feed.entries:
date = parse_published(get_date(post))
if date < START:
continue
key = date.strftime("%Y-%m-%d")
weeks.setdefault(key, [])
post = dict(date=date,
title=get_title(post),
url=get_link(post),
username=username,
blogname=blog[0],
description=create_extract(post.description))
if post['url'] not in [p['url'] for p in weeks[key]]:
weeks[key].append(post)
# -- main
config=settings.load_settings()
if os.path.exists('data'):
shutil.rmtree('data')
gitrepo = Repo.clone_from('https://git.wazong.de/iron-blogger/test.git', 'data')
try:
with open('data/blogs.yaml') as f:
users = yaml.safe_load(f.read())
except FileNotFoundError:
users = []
print(users)
if not os.path.exists('data/out'):
os.makedirs('data/out')
try:
with open('data/out/report.yaml') as f:
log = yaml.safe_load(f.read())
except FileNotFoundError:
log = {}
# START = datetime.datetime.strptime(config['start_date'],'%Y/%m/%d')
START = datetime.datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - datetime.timedelta(days=7)
if len(sys.argv) > 1:
for username in sys.argv[1:]:
blogs = log.setdefault(username, {})
for l in users[username]['links']:
parse_feeds(log, username, l)
else:
for (username, u) in list(users.items()):
if 'end' in u:
enddate = datetime.datetime.strptime(u['end'],'%Y/%m/%d')
if enddate < datetime.datetime.now():
print("User inactive: ", username)
continue
for l in u['blogs']:
parse_feeds(log, username, l)
with open('data/out/report.yaml', 'w') as f:
yaml.safe_dump(log, f)
gitrepo.index.add(['out/report.yaml'])
with open('data/blogs.yaml', 'w') as f:
yaml.safe_dump(users, f)
gitrepo.index.add(['blogs.yaml'])
print(gitrepo.index.diff(gitrepo.head.commit))
# gitrepo.index.commit('autocommit')
# gitrepo.remotes.origin.push()

View File

@ -1,43 +1,49 @@
#!/usr/bin/python
#!/usr/bin/python3
from lxml import html
import yaml
import sys
import urllib2
import urlparse
import urllib.request
import urllib.parse
with open('bloggers.yml') as f:
users = yaml.safe_load(f.read())
def fetch_links(url):
tree = html.fromstring(urllib2.urlopen(url).read())
links = tree.xpath(
'//link[@rel="alternate"][contains(@type, "rss") or ' +
'contains(@type, "atom") or contains(@type, "rdf")]')
candidates = [l for l in links if
print("Looking for feeds in %s" % (url,), file=sys.stderr)
try:
tree = html.document_fromstring(urllib.request.urlopen(url).read())
links = tree.xpath(
'//link[@rel="alternate"][contains(@type, "rss") or ' +
'contains(@type, "atom") or contains(@type, "rdf")]')
candidates = [l for l in links if
'atom' in l.attrib['type'] and
'comments' not in l.attrib['href'].lower() and
'comments' not in l.attrib.get('title','')]
except:
candidates = []
links = []
if candidates:
return candidates[0].attrib['href']
elif links:
return links[0].attrib['href']
else:
print >>sys.stderr, "No link found for %s" % (url,)
print("No link found for %s" % (url,), file=sys.stderr)
return None
for (name, u) in users.items():
for (name, u) in list(users.items()):
print("Processing user %s" % (name,), file=sys.stderr)
for e in u['links']:
(title, url) = e[1:3]
try:
e[1] = e[1].strip()
except:
except:
e[1] = e[1]
if len(e) == 4:
continue
link = fetch_links(url)
if link:
if not link.startswith('http:'):
link = urlparse.urljoin(url, link)
link = urllib.parse.urljoin(url, link)
e.append(link)
with open('bloggers.yml', 'w') as f:

33
ledger
View File

@ -1,28 +1,11 @@
2012-10-01 Week 0
User:musevg $-5
Pool:Owed:musevg
2012-10-01 Week 0
User:poster4nature $-5
Pool:Owed:poster4nature
2012-10-08 Week 1, Blog 0
User:hirnrinde $-5
Pool:Owed:hirnrinde
2012-10-08 Week 1, Blog 0
User:jantheofel $-5
Pool:Owed:jantheofel
2012-10-08 Week 1, Blog 0
User:poster4nature $-5
Pool:Owed:poster4nature
2012-10-08 Week 1, Blog 0
User:werkstatt $-5
Pool:Owed:werkstatt
2012-10-15 Week 2, Blog b2s
User:musevg $-5
Pool:Owed:musevg
2012-10-15 Schulden bezahlt
Pool:Owed:poster4nature $-10
Pool:Paid

View File

@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/python3
# This Python file uses the following encoding: utf-8
import yaml
from dateutil.parser import parse
@ -29,6 +29,7 @@ def get_balance(acct):
def get_debts():
p = subprocess.Popen(['ledger', '-f', os.path.join(HERE, 'ledger'),
'-n', 'balance', 'Pool:Owed:'],
universal_newlines=True,
stdout=subprocess.PIPE)
(out, _) = p.communicate()
debts = []
@ -48,7 +49,7 @@ def parse_skip(rec):
out = []
for s in spec:
if isinstance(s, list):
out.append(map(to_week_num, s))
out.append(list(map(to_week_num, s)))
else:
out.append(to_week_num(s))
return out
@ -72,7 +73,7 @@ def render_template(path, week=None, **kwargs):
else:
week = START
week = (week - START).days / 7
week = int( (week - START).days / 7)
week_start = START + (week * datetime.timedelta(7))
week_end = START + ((week + 1) * datetime.timedelta(7))
@ -84,7 +85,7 @@ def render_template(path, week=None, **kwargs):
class User(object):
pass
for (un, rec) in users.items():
for (un, rec) in list(users.items()):
u = User()
u.username = un
u.name = rec['name']
@ -97,8 +98,8 @@ def render_template(path, week=None, **kwargs):
u.stop = rec.get('stop')
u.skip = parse_skip(rec)
u.posts = report.get(un, {})
u.goodblogs = []
u.lameblogs = []
u.goodblogs = []
u.lameblogs = []
userlist.append(u)
@ -121,13 +122,13 @@ def render_template(path, week=None, **kwargs):
continue
if should_skip(u.skip, week):
skipped_users.append(u)
continue
continue
elif user_start > week_start:
skip.append(u)
continue
for blog in u.links:
b=blog[0]
weeks=u.posts[b]
continue
for blog in u.links:
b=blog[0]
weeks=u.posts[b]
if len(weeks) <= week or not weeks[week]:
u.lameblogs.append(b)
else:
@ -144,10 +145,10 @@ def render_template(path, week=None, **kwargs):
if __name__ == '__main__':
if len(sys.argv) < 2:
print >>sys.stderr, "Usage: %s TEMPLATE [WEEK]"
print("Usage: %s TEMPLATE [WEEK]", file=sys.stderr)
sys.exit(1)
template = sys.argv[1]
week = None
if len(sys.argv) > 2: week = sys.argv[2]
print render_template(template, week)
print(render_template(template, week))

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
pyyaml
feedparser
python-dateutil
GitPython

View File

@ -1,81 +0,0 @@
#!/usr/bin/python
import yaml
import feedparser
import datetime
import sys
import os
from dateutil.parser import parse
import dateutil.tz as tz
import settings
config=settings.load_settings()
with open('bloggers.yml') as f:
users = yaml.safe_load(f.read())
if not os.path.exists('out'):
os.makedirs('out')
try:
with open('out/report.yml') as f:
log = yaml.safe_load(f.read())
except IOError:
log = {}
START = datetime.datetime.strptime(config['start_date'],'%Y/%m/%d')
def parse_published(pub):
try:
return parse(pub).astimezone(tz.tzlocal()).replace(tzinfo=None)
except:
return parse(pub).replace(tzinfo=None)
def get_date(post):
for k in ('published', 'created', 'updated'):
if k in post:
return post[k]
def get_link(post):
return post.link
def parse_feeds(weeks, uri):
feed = feedparser.parse(uri)
print >>sys.stderr, "Parsing: %s" % uri
if not feed.entries:
print >>sys.stderr, "WARN: no entries for ", uri
for post in feed.entries:
date = parse_published(get_date(post))
if date < START:
continue
wn = (date - START).days / 7
while len(weeks) <= wn:
weeks.append([])
if post.has_key('title'):
post = dict(date=date,
title=post.title,
url=get_link(post))
if not post.has_key('title'):
post = dict(date=date,
title="",
url=get_link(post))
if post['url'] not in [p['url'] for p in weeks[wn]]:
weeks[wn].append(post)
if len(sys.argv) > 1:
for username in sys.argv[1:]:
blogs = log.setdefault(username, {})
for l in users[username]['links']:
weeks = blogs.setdefault(l[0], [])
parse_feeds(weeks, l[3])
else:
for (username, u) in users.items():
blogs = log.setdefault(username, {})
for l in u['links']:
weeks = blogs.setdefault(l[0], [])
parse_feeds(weeks, l[3])
with open('out/report.yml', 'w') as f:
yaml.safe_dump(log, f)

View File

@ -1,16 +1,17 @@
#!/usr/bin/python
import ConfigParser, os
import configparser, os
def load_settings():
configfile = ConfigParser.ConfigParser()
configfile = configparser.ConfigParser()
configfile.read('settings.cfg')
config=dict()
config['mail']=configfile.get("general","mail")
config['start_date']=configfile.get("general","start_date")
config['report_interval']=configfile.get("general","report_interval", fallback="weekly")
config['username']=configfile.get("blogsettings","username")
config['password']=configfile.get("blogsettings","password")
config['password']=configfile.get("blogsettings","password", fallback="")
config['xmlrpc_endpoint']=configfile.get("blogsettings","xmlrpc_endpoint")
config['blog_id']=configfile.get("blogsettings","blog_id")
config['blog_id']=configfile.get("blogsettings","blog_id", fallback="0")
config['participants_page_id']=configfile.get("blogsettings","participants_page_id")
return config

View File

@ -19,10 +19,16 @@ PUNTED for balance ≥$30: ${", ".join(sorted(punt))}
People who posted:
% for u in sorted(userlist, key=lambda u:u.name[u.name.rfind(' '):].lower()):
% for b in u.goodblogs:
<% pc=0 %>
${u.name} in ${b} (${u.username}):
% for p in u.posts[b][week]:
- ${p['url']}
% if pc < 10:
<% pc+=1 %> - ${p['url']}
% endif
% endfor
% if len(u.posts[b][week]) > 10:
(and ${len(u.posts[b][week])-10} more)
% endif
% endfor
% endfor
@ -47,7 +53,7 @@ Paid: € ${paid}
Events: € ${event}
Individual debts:
% for (u, v) in sorted(debts, key=lambda p:p[1], reverse=True):
${u"%20s %d \u20AC" % (u, v)}
${"%20s %d \u20AC" % (u, v)}
% endfor
PREVIOUSLY PUNTED (pay € 30 balance to return):

View File

@ -2,7 +2,8 @@
From: ${mail}
Content-Type: text/plain; charset=utf-8
Subject: Iron Blogger Erinnerung: noch kein Beitrag in der Woche ab ${week_start.strftime("%d.%m.%Y")}:
To: \
To: ${mail}
Bcc: \
% for u in sorted(userlist, key=lambda u:u.name[u.name.rfind(' '):].lower()):
% if len(u.lameblogs)>0:
${u.mail}, \

View File

@ -14,10 +14,15 @@ Zusammenfassung der Woche ab ${week_start.strftime("%d.%m.%Y")}
% endif
:</span></dt>
<dd>
<ul>
% for p in u.posts[g][week]:
<li><a href="${p['url']}">${p['title'] or "[ohne Titel]"}</a></li>
<% pc=0 %><ul>
% for p in sorted(u.posts[g][week], key = lambda p:p['date']):
% if pc < 10 or len(u.posts[g][week]) == 11:
<% pc+=1 %><li><a href="${p['url']}">${p['title'] or "[ohne Titel]"}</a></li>
%endif
% endfor
% if len(u.posts[g][week]) > 11:
<li>und <a href="${b[2]}">${len(u.posts[g][week])-10} weitere Artikel</a>...</li>
% endif
</ul>
</dd>
% endif

View File

@ -1,18 +1,18 @@
#!/usr/bin/python
#!/usr/bin/python3
import render
import os
import sys
import xmlrpclib
import xmlrpc.client
import subprocess
import settings
config=settings.load_settings()
x = xmlrpclib.ServerProxy(config['xmlrpc_endpoint'])
x = xmlrpc.client.ServerProxy(config['xmlrpc_endpoint'])
page = x.wp.getPage(config['blog_id'], config['participants_page_id'], config['username'], config['password'])
text = render.render_template('templates/users.tmpl')
text = render.render_template('templates/users.tmpl').decode("utf-8")
page['description'] = text
x.wp.editPage(config['blog_id'], config['participants_page_id'], config['username'], config['password'],page,True)

View File

@ -1,9 +1,9 @@
#!/usr/bin/python
#!/usr/bin/python3
# This Python file uses the following encoding: utf-8
import render
import os
import sys
import xmlrpclib
import xmlrpc.client
import subprocess
import datetime
import yaml
@ -22,7 +22,7 @@ if len(args)>0:
if args[0] == '-q':
dry_run = True
quick_view = True
send_mail = False
send_mail = False
args = args[1:]
if args[0] == '-r':
@ -32,14 +32,15 @@ if len(args)>0:
if args[0] == '-n':
dry_run = True
send_mail = False
send_mail = False
args = args[1:]
date = args[0]
with open('ledger', 'a') as f:
f.write("\n")
f.write(render.render_template('templates/ledger', date))
# print(render.render_template('templates/ledger', date).decode("utf-8"))
f.write(render.render_template('templates/ledger', date).decode("utf-8"))
if not dry_run:
subprocess.check_call(["git", "commit", "ledger",
@ -61,7 +62,7 @@ with open('ledger', 'a') as f:
if not dry_run:
text = render.render_template('templates/week.tmpl', date, punt=punt)
text = render.render_template('templates/week.tmpl', date, punt=punt).decode("utf-8")
lines = text.split("\n")
title = lines[0]
@ -69,16 +70,16 @@ if not dry_run:
page = dict(title = title, description = body)
x = xmlrpclib.ServerProxy(config['xmlrpc_endpoint'])
x = xmlrpc.client.ServerProxy(config['xmlrpc_endpoint'])
x.metaWeblog.newPost(config['blog_id'], config['username'], config['password'], page, True)
if not reminder:
email = render.render_template('templates/email.txt', date, punt=punt,mail=config['mail'])
else:
email = render.render_template('templates/reminder.txt', date, punt=punt,mail=config['mail'])
if quick_view:
print(render.render_template('templates/quick_view.tmpl',date,punt=punt))
print((render.render_template('templates/quick_view.tmpl',date,punt=punt)))
if dry_run and not quick_view:
print email
print(email)
if send_mail:
# p = subprocess.Popen(['mutt', '-H', '/dev/stdin'],
p = subprocess.Popen(['/usr/sbin/sendmail', '-oi', '-t'],