sl-feeds/bin/changelog_http_poll.py

195 lines
5.2 KiB
Python

#!/usr/bin/env python
# Mon Oct 17 08:25:29 PDT 2011
# copyright 2011 Vincent Batts, Vienna, VA, USA
# switching from an inotify watcher, to an http poll
# since what lands on connie.slackware.com usually doesn't go public
# immediately
import os
import sys
import glob
import time
from datetime import datetime
from datetime import timedelta
from time import mktime
import urllib2
import anydbm
DEFAULT_DB = os.path.join(os.getenv('HOME'), '.slackware_changelog.db')
DEFAULT_URL = "http://slackware.osuosl.org/"
SLACKWARE_DIR_PATH = "/mirrors/ftp.slackware.com/pub/slackware"
RSS_DIR_PATH = "/home/vbatts/public_html/feeds"
'''
slackware-12.2_ChangeLog.rss
/home/vbatts/public_html/feeds/slackware-10.1_patches_ChangeLog.rss
/home/vbatts/public_html/feeds/slackware-8.1_patches_ChangeLog.rss
>>> for i in c.slackware_versions(): print i
...
/mirrors/ftp.slackware.com/pub/slackware/slackware64-13.0/ChangeLog.txt
/mirrors/ftp.slackware.com/pub/slackware/slackware-8.1/ChangeLog.txt
/mirrors/ftp.slackware.com/pub/slackware/slackware64-13.37/ChangeLog.txt
/mirrors/ftp.slackware.com/pub/slackware/slackware-13.0/ChangeLog.txt
/mirrors/ftp.slackware.com/pub/sla
'''
def rss_files():
for item in glob.glob(RSS_DIR_PATH + "/*.rss"):
yield item
def rss_files_format(str):
if str.startswith(RSS_DIR_PATH + "/"):
str = str[len(RSS_DIR_PATH + "/"):]
if str.endswith(".rss"):
str = str[:-4]
str = str + '.txt'
return str.replace('_','/')
def rss_files_cleaned():
for i in rss_files():
yield rss_files_format(i)
def slackware_versions():
changes = glob.glob(SLACKWARE_DIR_PATH + "/*/ChangeLog.txt")
patches = glob.glob(SLACKWARE_DIR_PATH + "/*/patches/ChangeLog.txt")
for item in changes + patches:
yield item
def slackware_versions_format(str):
if str.startswith(SLACKWARE_DIR_PATH + "/"):
str = str[len(SLACKWARE_DIR_PATH + "/"):]
if str.endswith("/"):
str = str[:-1]
if str.startswith("/"):
str = str[1:]
if str.endswith(".txt"):
str = str[:-4]
return str.replace('/','_')
def slackware_versions_strip():
for i in slackware_versions():
yield i[len(SLACKWARE_DIR_PATH + "/"):]
def slackware_versions_rss():
for i in slackware_versions():
yield slackware_versions_format(i)
def process_changelog_rss(pathname):
if os.path.basename(pathname) == "ChangeLog.txt":
print "%f: proccessing %s" % (time.time(), pathname)
# XXX REPLACE ME!!
cmd = "/home/vbatts/opt/bin/ruby /home/vbatts/bin/gen_changlog_rss.rb %s" % pathname
print cmd
print os.system(cmd)
else:
print '[WARN] "%s" is not a ChangeLog.txt file' % pathname
def db_setup(name = DEFAULT_DB):
try:
return anydbm.open(name, 'c')
except:
return None
def db_teardown(db):
try:
return db.close()
except:
return None
def db_add_ts(db, key, val):
if type(val) == float:
db[key] = str(val)
if type(val) == datetime:
db[key] = str(unix_time(val))
return db[key]
def db_get_ts(db, key):
try:
return datetime.fromtimestamp(float(db[key]))
except KeyError:
return None
def unix_time(dt):
return mktime(dt.timetuple())+1e-6*dt.microsecond
def time_from_header(str):
return datetime.strptime(str, "%a, %d %b %Y %H:%M:%S %Z")
def get_remote_header(url, header):
try:
req = urllib2.Request(url)
resp = urllib2.urlopen(req)
return resp.headers.getheader(header)
except:
return None
def get_remote_time_str(url):
return get_remote_header(url,"last-modified")
def get_remote_time(url):
time_str = get_remote_time_str(url)
if time_str:
return time_from_header(time_str)
else:
return None
def get_local_time(path):
try:
time_flt = os.stat(path).st_mtime
return datetime.fromtimestamp(time_flt)
except:
return None
def main(args):
try:
db = db_setup()
if db == None:
print "ERROR: could not setup database at %s" % DEFAULT_DB
return 1
for i in slackware_versions_strip():
# i'm not going to worry about this file, right now
if i == 'slackware/ChangeLog.txt':
continue
rss_file_name = os.path.join(RSS_DIR_PATH,
slackware_versions_format(i) + ".rss")
rss_ts = get_local_time(rss_file_name)
curr_ts = get_local_time(os.path.join(SLACKWARE_DIR_PATH, i))
prev_ts = db_get_ts( db, "local_" + i)
# Go no further for this file
if curr_ts == prev_ts and os.path.exists(rss_file_name) and rss_ts > prev_ts:
print '[INFO] Local time of "%s" is same as the database has' % i
continue
db_add_ts( db, "local_" + i, curr_ts)
remote_ts = get_remote_time(DEFAULT_URL + i)
print '[INFO] inserting remote_%s: %s' % (i,remote_ts)
db_add_ts( db, "remote_" + i, remote_ts)
if prev_ts == None or (remote_ts - prev_ts) == timedelta(hours=7):
print '[INFO] local and remote ChangeLog times match'
if rss_ts == None:
print '[INFO] RSS file (%s) does not exist' % (rss_ts)
print '[INFO] Processing "%s"' % rss_file_name
process_changelog_rss(os.path.join(SLACKWARE_DIR_PATH, i))
elif prev_ts == None or rss_ts < prev_ts:
print '[INFO] RSS file (%s) is older than the ChangeLog (%s)' % (rss_ts, prev_ts)
print '[INFO] Processing "%s"' % rss_file_name
process_changelog_rss(os.path.join(SLACKWARE_DIR_PATH, i))
else:
print '[INFO] RSS seems current'
finally:
try:
os.wait()
except:
pass
db_teardown(db)
if __name__ == "__main__": sys.exit(main(sys.argv[1:]))