import logging from apscheduler.schedulers.blocking import BlockingScheduler from peewee import fn from tempfile import SpooledTemporaryFile from gzip import GzipFile from data import model from data.archivedlogs import JSON_MIMETYPE from data.database import RepositoryBuild, db_random_func from app import build_logs, log_archive from util.streamingjsonencoder import StreamingJSONEncoder POLL_PERIOD_SECONDS = 30 MEMORY_TEMPFILE_SIZE = 64 * 1024 # Large enough to handle approximately 99% of builds in memory logger = logging.getLogger(__name__) sched = BlockingScheduler() @sched.scheduled_job(trigger='interval', seconds=30) def archive_redis_buildlogs(): """ Archive a single build, choosing a candidate at random. This process must be idempotent to avoid needing two-phase commit. """ try: # Get a random build to archive to_archive = model.archivable_buildlogs_query().order_by(db_random_func()).get() logger.debug('Archiving: %s', to_archive.uuid) length, entries = build_logs.get_log_entries(to_archive.uuid, 0) to_encode = { 'start': 0, 'total': length, 'logs': entries, } with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile: with GzipFile('testarchive', fileobj=tempfile) as zipstream: for chunk in StreamingJSONEncoder().iterencode(to_encode): zipstream.write(chunk) tempfile.seek(0) log_archive.store_file(tempfile, JSON_MIMETYPE, content_encoding='gzip', file_id=to_archive.uuid) to_archive.logs_archived = True to_archive.save() build_logs.expire_log_entries(to_archive.uuid) except RepositoryBuild.DoesNotExist: logger.debug('No more builds to archive') if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) sched.start()