commit f5222933987e9eb48d5ecb4f732bd23922cbf835 Author: Vincent Batts Date: Wed Jan 25 12:24:20 2017 -0800 adding this for history sake It has not been updated since 2011 and should get a rewrite and simplification... diff --git a/README b/README new file mode 100644 index 0000000..5aa3a09 --- /dev/null +++ b/README @@ -0,0 +1,15 @@ +hack scripts + +They accomplish going from ChangeLog.txt -> RSS feeds that folks can subscribed to. +Ultimately ending up at http://www.slackware.com/~vbatts/feeds/ + +These are a mess, but still work. + +I wrote a utility called slack-utils (https://github.com/vbatts/slack-utils/) +with a ruby gem (https://rubygems.org/gems/slack-utils). The python script +(`./bin/changelog_http_poll.py`) is called by a crontab. Python walks the +changelog.txt, then fetches from the http mirror the modified time. If it http +has the new version, then run the ruby script (`./bin/gen_changlog_rss.rb`). +This is what parses the changelog and returns RSS. Which is then written to the +corresponding file for public consumption. + diff --git a/bin/changelog_alphageek.rb b/bin/changelog_alphageek.rb new file mode 100755 index 0000000..458a6d5 --- /dev/null +++ b/bin/changelog_alphageek.rb @@ -0,0 +1,62 @@ +#!/home/vbatts/opt/bin/ruby + +#require 'fileutils' +require 'logger' +require 'tempfile' +require 'stringio' + +require 'rubygems' +require 'slackware' +require 'slackware/changelog/rss' + +#include FileUtils + +$LOG = Logger.new(STDERR) +$LOG.level = Logger::WARN + +FEEDS_BASE_DIR = "/home/vbatts/public_html/feeds/" +#url = 'http://alphageek.dyndns.org/linux/slackware-packages/slack-13.1/ChangeLog.txt' +# Sun Feb 13 08:44:35 PST 2011 +# new url +URL = 'http://alphageek.dyndns.org/mirrors/alphageek/slackware-%s/ChangeLog.txt' + +VERSIONS = %w{ 14.0 14.1 } + +def url(ver) + URL % ver +end + +if ARGV.include?('-v') + $LOG.level = Logger::DEBUG +end + +VERSIONS.each {|ver| + begin + #tmp_file = File.open("/tmp/vbatts/alpha_log-#{(rand*1000).to_i}.xxx", "w+") + tmp_file = Tempfile.new("alpha_log") + $LOG.debug('tmp_file') { tmp_file } + + strio = StringIO.new() + $LOG.debug('created ') { strio } + + buffer = `lynx -source #{url(ver)}` + $LOG.debug('buffer length') { buffer.length } + + tmp_file.write(buffer) + tmp_file.flush + + changelog = Slackware::ChangeLog.new(tmp_file.path) + changelog.parse + strio.write(changelog.to_rss( + :noimage => true, + :title => "alphageek's #{ver} ChangeLog", + :url => url(ver))) + ensure + strio.seek(0) + tmp_file.close + end + feed_file = File.open(FEEDS_BASE_DIR + "alphageek-#{ver}_ChangeLog.rss", "w+") + $LOG.debug('feed_file') { feed_file } + feed_file.write(strio.read()) + feed_file.close +} diff --git a/bin/changelog_http_poll.py b/bin/changelog_http_poll.py new file mode 100644 index 0000000..998f6aa --- /dev/null +++ b/bin/changelog_http_poll.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python +# Mon Oct 17 08:25:29 PDT 2011 +# copyright 2011 Vincent Batts, Vienna, VA, USA + +# switching from an inotify watcher, to an http poll +# since what lands on connie.slackware.com usually doesn't go public +# immediately + + +import os +import sys +import glob +import time +from datetime import datetime +from datetime import timedelta +from time import mktime +import urllib2 +import anydbm + +DEFAULT_DB = os.path.join(os.getenv('HOME'), '.slackware_changelog.db') +DEFAULT_URL = "http://slackware.osuosl.org/" +SLACKWARE_DIR_PATH = "/mirrors/ftp.slackware.com/pub/slackware" +RSS_DIR_PATH = "/home/vbatts/public_html/feeds" + +''' +slackware-12.2_ChangeLog.rss +/home/vbatts/public_html/feeds/slackware-10.1_patches_ChangeLog.rss +/home/vbatts/public_html/feeds/slackware-8.1_patches_ChangeLog.rss +>>> for i in c.slackware_versions(): print i +... +/mirrors/ftp.slackware.com/pub/slackware/slackware64-13.0/ChangeLog.txt +/mirrors/ftp.slackware.com/pub/slackware/slackware-8.1/ChangeLog.txt +/mirrors/ftp.slackware.com/pub/slackware/slackware64-13.37/ChangeLog.txt +/mirrors/ftp.slackware.com/pub/slackware/slackware-13.0/ChangeLog.txt +/mirrors/ftp.slackware.com/pub/sla +''' + +def rss_files(): + for item in glob.glob(RSS_DIR_PATH + "/*.rss"): + yield item + +def rss_files_format(str): + if str.startswith(RSS_DIR_PATH + "/"): + str = str[len(RSS_DIR_PATH + "/"):] + if str.endswith(".rss"): + str = str[:-4] + str = str + '.txt' + return str.replace('_','/') + +def rss_files_cleaned(): + for i in rss_files(): + yield rss_files_format(i) + +def slackware_versions(): + changes = glob.glob(SLACKWARE_DIR_PATH + "/*/ChangeLog.txt") + patches = glob.glob(SLACKWARE_DIR_PATH + "/*/patches/ChangeLog.txt") + for item in changes + patches: + yield item + +def slackware_versions_format(str): + if str.startswith(SLACKWARE_DIR_PATH + "/"): + str = str[len(SLACKWARE_DIR_PATH + "/"):] + if str.endswith("/"): + str = str[:-1] + if str.startswith("/"): + str = str[1:] + if str.endswith(".txt"): + str = str[:-4] + return str.replace('/','_') + +def slackware_versions_strip(): + for i in slackware_versions(): + yield i[len(SLACKWARE_DIR_PATH + "/"):] + +def slackware_versions_rss(): + for i in slackware_versions(): + yield slackware_versions_format(i) + +def process_changelog_rss(pathname): + if os.path.basename(pathname) == "ChangeLog.txt": + print "%f: proccessing %s" % (time.time(), pathname) + # XXX REPLACE ME!! + cmd = "/home/vbatts/opt/bin/ruby /home/vbatts/bin/gen_changlog_rss.rb %s" % pathname + print cmd + print os.system(cmd) + else: + print '[WARN] "%s" is not a ChangeLog.txt file' % pathname + +def db_setup(name = DEFAULT_DB): + try: + return anydbm.open(name, 'c') + except: + return None + +def db_teardown(db): + try: + return db.close() + except: + return None + +def db_add_ts(db, key, val): + if type(val) == float: + db[key] = str(val) + if type(val) == datetime: + db[key] = str(unix_time(val)) + return db[key] + +def db_get_ts(db, key): + try: + return datetime.fromtimestamp(float(db[key])) + except KeyError: + return None + +def unix_time(dt): + return mktime(dt.timetuple())+1e-6*dt.microsecond + +def time_from_header(str): + return datetime.strptime(str, "%a, %d %b %Y %H:%M:%S %Z") + +def get_remote_header(url, header): + try: + req = urllib2.Request(url) + resp = urllib2.urlopen(req) + return resp.headers.getheader(header) + except: + return None + +def get_remote_time_str(url): + return get_remote_header(url,"last-modified") + +def get_remote_time(url): + time_str = get_remote_time_str(url) + if time_str: + return time_from_header(time_str) + else: + return None + +def get_local_time(path): + try: + time_flt = os.stat(path).st_mtime + return datetime.fromtimestamp(time_flt) + except: + return None + +def main(args): + try: + db = db_setup() + if db == None: + print "ERROR: could not setup database at %s" % DEFAULT_DB + return 1 + + for i in slackware_versions_strip(): + # i'm not going to worry about this file, right now + if i == 'slackware/ChangeLog.txt': + continue + + rss_file_name = os.path.join(RSS_DIR_PATH, + slackware_versions_format(i) + ".rss") + rss_ts = get_local_time(rss_file_name) + curr_ts = get_local_time(os.path.join(SLACKWARE_DIR_PATH, i)) + prev_ts = db_get_ts( db, "local_" + i) + + # Go no further for this file + if curr_ts == prev_ts and os.path.exists(rss_file_name) and rss_ts > prev_ts: + print '[INFO] Local time of "%s" is same as the database has' % i + continue + + db_add_ts( db, "local_" + i, curr_ts) + + remote_ts = get_remote_time(DEFAULT_URL + i) + print '[INFO] inserting remote_%s: %s' % (i,remote_ts) + db_add_ts( db, "remote_" + i, remote_ts) + + if prev_ts == None or (remote_ts - prev_ts) == timedelta(hours=7): + print '[INFO] local and remote ChangeLog times match' + if rss_ts == None: + print '[INFO] RSS file (%s) does not exist' % (rss_ts) + print '[INFO] Processing "%s"' % rss_file_name + process_changelog_rss(os.path.join(SLACKWARE_DIR_PATH, i)) + elif prev_ts == None or rss_ts < prev_ts: + print '[INFO] RSS file (%s) is older than the ChangeLog (%s)' % (rss_ts, prev_ts) + print '[INFO] Processing "%s"' % rss_file_name + process_changelog_rss(os.path.join(SLACKWARE_DIR_PATH, i)) + else: + print '[INFO] RSS seems current' + finally: + try: + os.wait() + except: + pass + db_teardown(db) + +if __name__ == "__main__": sys.exit(main(sys.argv[1:])) + diff --git a/bin/changelog_pyinotify.py b/bin/changelog_pyinotify.py new file mode 100755 index 0000000..b1af524 --- /dev/null +++ b/bin/changelog_pyinotify.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python + +import os +import sys +import glob +import time + +sys.path.insert(0, "/home/vbatts/opt/lib/python2.5/site-packages") +sys.path.insert(0, "/home/vbatts/opt/lib/python2.5") +import pyinotify + +dir_path = "/mirrors/ftp.slackware.com/pub/slackware" + +def process_changelog_rss(event): + if os.path.basename(event.pathname) == "ChangeLog.txt": + print "%f: proccessing %s" % (time.time(), event) + os.system("/home/vbatts/opt/bin/ruby /home/vbatts/bin/gen_changlog_rss.rb %s" % event.pathname) + +def main(args): + wm = pyinotify.WatchManager() + + notifier = pyinotify.Notifier(wm) + + for dir in glob.glob(dir_path + "/*/"): + if os.path.exists(dir + "ChangeLog.txt"): + print "%f: Adding watch for %s" % (time.time(), dir) + wm.add_watch(dir, pyinotify.IN_MOVED_TO, rec=False, proc_fun=process_changelog_rss) + + for dir in glob.glob(dir_path + "/*/patches/"): + print "%f: Adding watch for %s" % (time.time(), dir) + wm.add_watch(dir, pyinotify.IN_MOVED_TO, rec=False, proc_fun=process_changelog_rss) + + #wm.add_watch("/home/vbatts/", pyinotify.IN_MOVED_TO, rec=False, proc_fun=process_changelog_rss) + + notifier.loop() + + +if __name__ == "__main__": main(sys.argv[1:]) + diff --git a/bin/changelog_slackware-rss.rb b/bin/changelog_slackware-rss.rb new file mode 100755 index 0000000..3a63672 --- /dev/null +++ b/bin/changelog_slackware-rss.rb @@ -0,0 +1,155 @@ +#!/usr/bin/env ruby +# Sun Jan 23 11:30:53 PST 2011 +# Created by vbatts, vbatts@hashbangbash.com + +$PROGRAM_NAME = File.basename(__FILE__) + +require 'find' + +require 'rubygems' +require 'ruby-prof' +require 'slackware' +require 'slackware/changelog/rss' +require 'rb-inotify' + + +BASE_URL = "http://slackware.osuosl.org/" +MIRROR_BASE_DIR = "/mirrors/ftp.slackware.com/pub/slackware/" +FEEDS_BASE_DIR = "/home/vbatts/public_html/feeds/" +RE_REPO_NAME = Regexp.new(/slackware(\d{2})?-(\d+\.\d+|current)\/(patches)?\/?.*/) + +def generate_new_if_none + files = [] + + Find.find(MIRROR_BASE_DIR) {|file| + relative_name = file.sub(MIRROR_BASE_DIR, "") + if File.basename(file) == "ChangeLog.txt" + if not(relative_name.include?("zipslack")) + files << relative_name + Find.prune + end + end + # putting this check *after* the one above, + # lets us get the patches directories too + # while still getting a bit of speed (1.5s) + if relative_name.split("/").count > 2 + Find.prune + end + } + puts "%f: watching %d changelogs" % [Time.now.to_f, files.count] + files.each {|file| + m = RE_REPO_NAME.match file + if m[3].nil? + file_name = "%sslackware%s-%s_ChangeLog.rss" % [FEEDS_BASE_DIR, m[1], m[2]] + else + file_name = "%sslackware%s-%s_%s_ChangeLog.rss" % [FEEDS_BASE_DIR, m[1], m[2], m[3]] + end + unless File.exist?(file_name) + c_file = MIRROR_BASE_DIR + file + changelog = Slackware::ChangeLog.new(c_file, :version => m[2]) + changelog.opts[:arch] = m[1] unless m[1].nil? + if m[3].nil? + changelog.opts[:url] = "%sslackware%s-%s/ChangeLog.txt" % [BASE_URL, m[1], m[2]] + feed = File.open( "%sslackware%s-%s_ChangeLog.rss" % [FEEDS_BASE_DIR, m[1], m[2]], "w+") + else + changelog.opts[:url] = "%sslackware%s-%s/%s/ChangeLog.txt" % [BASE_URL, m[1], m[2], m[3]] + feed = File.open( "%sslackware%s-%s_%s_ChangeLog.rss" % [FEEDS_BASE_DIR, m[1], m[2], m[3]], "w+") + end + changelog.parse + puts "%f: Making a first feed: %s" % [Time.now.to_f, feed.path] + feed << changelog.to_rss + feed.close + changelog = nil + end + } +end + +def run_notifier + n = INotify::Notifier.new + dirs = Dir.glob(MIRROR_BASE_DIR + "*") + dirs.concat(Dir.glob(MIRROR_BASE_DIR + "*/patches/")) + dirs.each {|dir| + next unless File.exist?(File.join(dir, "ChangeLog.txt")) + puts "%f: working with %s" % [Time.now.to_f, dir] + n.watch(dir, :moved_to) {|mfile| + file_name = mfile.absolute_name + if File.basename(file_name) == "ChangeLog.txt" + puts "%f: looking into %s" % [Time.now.to_f, file_name] + match_data = RE_REPO_NAME.match(file_name) + + unless match_data.nil? + changelog = Slackware::ChangeLog.new(file_name, :version => match_data[2]) + changelog.opts[:arch] = match_data[1] unless match_data[1].nil? + + if match_data[3].nil? + changelog.opts[:url] = "%sslackware%s-%s/ChangeLog.txt" % [ + BASE_URL, + match_data[1], + match_data[2] + ] + feed = File.open( "%sslackware%s-%s_ChangeLog.rss" % [ + FEEDS_BASE_DIR, + match_data[1], + match_data[2] + ], "w+") + else + changelog.opts[:url] = "%sslackware%s-%s/%s/ChangeLog.txt" % [ + BASE_URL, + match_data[1], + match_data[2], + match_data[3] + ] + feed = File.open( "%sslackware%s-%s_%s_ChangeLog.rss" % [ + FEEDS_BASE_DIR, + match_data[1], + match_data[2], + match_data[3] + ], "w+") + end + begin + changelog.parse + rescue StandardError => ex + puts "%f: %s" % [Time.now.to_f, ex.message] + puts "%f: %s" % [Time.now.to_f, file_name] + next + end + + puts "%f: parsed %s to %s" % [Time.now.to_f, file_name, feed.path] + + feed << changelog.to_rss + feed.close + changelog = nil + end + end + } + } + begin + n.run + rescue Interrupt + end +end + +## Main + +#generate_new_if_none() +begin + RubyProf.start + run_notifier() +ensure + result = RubyProf.stop + + RubyProf.measure_mode = RubyProf::PROCESS_TIME + RubyProf.measure_mode = RubyProf::WALL_TIME + RubyProf.measure_mode = RubyProf::CPU_TIME + #RubyProf.measure_mode = RubyProf::ALLOCATIONS + #RubyProf.measure_mode = RubyProf::MEMORY + #RubyProf.measure_mode = RubyProf::GC_RUNS + #RubyProf.measure_mode = RubyProf::GC_TIME + + output_file_name = File.join(ENV["HOME"],"%s-%s%s" % [Time.now.to_i.to_s,File.basename(__FILE__),".log"]) + output_file = File.open(output_file_name, "w+") + printer = RubyProf::FlatPrinter.new(result) + printer.print(output_file,0) + puts "%f: %s written" % [Time.now.to_f, output_file_name] + output_file.close +end diff --git a/bin/gen_changlog_rss.rb b/bin/gen_changlog_rss.rb new file mode 100755 index 0000000..c5ecace --- /dev/null +++ b/bin/gen_changlog_rss.rb @@ -0,0 +1,68 @@ +#!/home/vbatts/opt/bin/ruby + +require 'logger' + +$log = Logger.new(STDERR) +$log.level = Logger::DEBUG + +# put this in a loader function, because the +# rss library is SOO SLOW to load. we don't want to load it, +# if the script is going to fail early. +def load_libs() + require 'rubygems' + require 'slackware' + require 'slackware/changelog/rss' + require 'rb-inotify' +end + + +BASE_URL = "http://slackware.osuosl.org/" +FEEDS_BASE_DIR = "/home/vbatts/public_html/feeds/" +RE_REPO_NAME = Regexp.new(/slackware(\d{2})?-(\d+\.\d+|current)\/(patches)?\/?.*/) + +def gen_file(file) + m = RE_REPO_NAME.match file + if m[3].nil? + file_name = "%sslackware%s-%s_ChangeLog.rss" % [FEEDS_BASE_DIR, m[1], m[2]] + else + file_name = "%sslackware%s-%s_%s_ChangeLog.rss" % [FEEDS_BASE_DIR, m[1], m[2], m[3]] + end + + if File.exist?(file_name) + if File.mtime(file) < File.mtime(file_name) + printf("%f: INFO: %s is newer than %s\n", Time.now, file, file_name) + end + end + + changelog = Slackware::ChangeLog.new(file) #, :version => m[2]) + opts = Hash.new + opts[:arch] = m[1] unless m[1].nil? + if m[3].nil? + opts[:url] = "%sslackware%s-%s/ChangeLog.txt" % [BASE_URL, m[1], m[2]] + feed = File.open( "%sslackware%s-%s_ChangeLog.rss" % [FEEDS_BASE_DIR, m[1], m[2]], "w+") + else + opts[:url] = "%sslackware%s-%s/%s/ChangeLog.txt" % [BASE_URL, m[1], m[2], m[3]] + feed = File.open( "%sslackware%s-%s_%s_ChangeLog.rss" % [FEEDS_BASE_DIR, m[1], m[2], m[3]], "w+") + end + changelog.parse + printf("%f: INFO: generating feed: %s\n", Time.now.to_f, feed.path) + feed << changelog.to_rss(opts) + feed.close + changelog = nil +end + +if ARGV.count == 0 + $log.error("#{Time.now}: ERROR: ChangeLog.txt files must be passed\n") + exit(2) +else + load_libs() + for file in ARGV + if File.exist?(file) + gen_file(file) + else + $log.warn("#{Time.now}: WARN: #{file} does not exist\n") + end + end +end + +# vim: set sts=2 sw=2 et ai: diff --git a/crontab b/crontab new file mode 100644 index 0000000..4d951a8 --- /dev/null +++ b/crontab @@ -0,0 +1,2 @@ +#0 2 * * * ~/opt/bin/ruby ~/bin/changelog_alphageek.rb > /dev/null || echo "$(date): failed aphageek" | mail -s "[slackagg] alphageek's changelog failed $(date +%D)" vbatts@hashbangbash.com +0 */2 * * * python ~/bin/changelog_http_poll.py >/dev/null || echo "$(date): failed to poll changelogs" | mail -s "[slackrss] changelog_http_poll failed $(date +%D)" vbatts@hashbangbash.com