Switch to a unified worker system

- Handles logging - Handles reporting to Sentry - Removes old code around serving a web endpoint (unused now)
2015-07-28 17:25:12 -04:00 · 2015-07-28 17:25:12 -04:00 · ac0cca2d90
commit ac0cca2d90
parent dbd9a32c85
7 changed files with 264 additions and 268 deletions
--- a/workers/buildlogsarchiver.py
+++ b/workers/buildlogsarchiver.py
@ -1,6 +1,5 @@
 import logging

-from apscheduler.schedulers.blocking import BlockingScheduler
 from peewee import fn
 from tempfile import SpooledTemporaryFile
 from gzip import GzipFile
@ -10,47 +9,51 @@ from data.archivedlogs import JSON_MIMETYPE
 from data.database import RepositoryBuild, db_random_func
 from app import build_logs, log_archive
 from util.streamingjsonencoder import StreamingJSONEncoder
+from workers.worker import Worker

 POLL_PERIOD_SECONDS = 30
 MEMORY_TEMPFILE_SIZE = 64 * 1024  # Large enough to handle approximately 99% of builds in memory

 logger = logging.getLogger(__name__)
-sched = BlockingScheduler()

-@sched.scheduled_job(trigger='interval', seconds=30)
-def archive_redis_buildlogs():
-  """ Archive a single build, choosing a candidate at random. This process must be idempotent to
-      avoid needing two-phase commit. """
-  try:
-    # Get a random build to archive
-    to_archive = model.build.archivable_buildlogs_query().order_by(db_random_func()).get()
-    logger.debug('Archiving: %s', to_archive.uuid)
+class ArchiveBuildLogsWorker(Worker):
+  def __init__(self):
+    super(ArchiveBuildLogsWorker, self).__init__()
+    self.add_operation(self._archive_redis_buildlogs, POLL_PERIOD_SECONDS)

-    length, entries = build_logs.get_log_entries(to_archive.uuid, 0)
-    to_encode = {
-      'start': 0,
-      'total': length,
-      'logs': entries,
-    }
+  def _archive_redis_buildlogs(self):
+    """ Archive a single build, choosing a candidate at random. This process must be idempotent to
+        avoid needing two-phase commit. """
+    try:
+      # Get a random build to archive
+      to_archive = model.build.archivable_buildlogs_query().order_by(db_random_func()).get()
+      logger.debug('Archiving: %s', to_archive.uuid)

-    with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile:
-      with GzipFile('testarchive', fileobj=tempfile) as zipstream:
-        for chunk in StreamingJSONEncoder().iterencode(to_encode):
-          zipstream.write(chunk)
+      length, entries = build_logs.get_log_entries(to_archive.uuid, 0)
+      to_encode = {
+        'start': 0,
+        'total': length,
+        'logs': entries,
+      }

-      tempfile.seek(0)
-      log_archive.store_file(tempfile, JSON_MIMETYPE, content_encoding='gzip',
-                             file_id=to_archive.uuid)
+      with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile:
+        with GzipFile('testarchive', fileobj=tempfile) as zipstream:
+          for chunk in StreamingJSONEncoder().iterencode(to_encode):
+            zipstream.write(chunk)

-    to_archive.logs_archived = True
-    to_archive.save()
+        tempfile.seek(0)
+        log_archive.store_file(tempfile, JSON_MIMETYPE, content_encoding='gzip',
+                               file_id=to_archive.uuid)

-    build_logs.expire_log_entries(to_archive.uuid)
+      to_archive.logs_archived = True
+      to_archive.save()

-  except RepositoryBuild.DoesNotExist:
-    logger.debug('No more builds to archive')
+      build_logs.expire_log_entries(to_archive.uuid)
+
+    except RepositoryBuild.DoesNotExist:
+      logger.debug('No more builds to archive')


 if __name__ == "__main__":
-  logging.basicConfig(level=logging.DEBUG)
-  sched.start()
+  worker = ArchiveBuildLogsWorker()
+  worker.start()