Finish the build logs archiver, add handlers for cloud and local that handle gzip encoded archived content.

2014-09-11 15:33:10 -04:00 · 2014-09-11 15:33:10 -04:00 · 8b3a3178b0
commit 8b3a3178b0
parent 2455c17f96
10 changed files with 82 additions and 18 deletions
--- a/data/archivedlogs.py
+++ b/data/archivedlogs.py
@ -1,5 +1,30 @@
+import logging
+
+from gzip import GzipFile
+from flask import send_file, abort
+from cStringIO import StringIO
+
 from data.userfiles import DelegateUserfiles, UserfilesHandlers

+
+JSON_MIMETYPE = 'application/json'
+
+
+logger = logging.getLogger(__name__)
+
+
+class LogArchiveHandlers(UserfilesHandlers):
+  def get(self, file_id):
+    path = self._files.get_file_id_path(file_id)
+    try:
+      with self._storage.stream_read_file(self._locations, path) as gzip_stream:
+        with GzipFile(fileobj=gzip_stream) as unzipped:
+          unzipped_buffer = StringIO(unzipped.read())
+          return send_file(unzipped_buffer, mimetype=JSON_MIMETYPE)
+    except IOError:
+      abort(404)
+
+
 class LogArchive(object):
  def __init__(self, app=None, distributed_storage=None):
    self.app = app
@ -17,7 +42,7 @@ class LogArchive(object):
    log_archive = DelegateUserfiles(app, distributed_storage, location, path, handler_name)

    app.add_url_rule('/logarchive/<file_id>',
-                     view_func=UserfilesHandlers.as_view(handler_name,
+                     view_func=LogArchiveHandlers.as_view(handler_name,
                                                          distributed_storage=distributed_storage,
                                                          location=location,
                                                          files=log_archive))
--- a/data/buildlogs.py
+++ b/data/buildlogs.py
@ -51,6 +51,13 @@ class RedisBuildLogs(object):
    except redis.ConnectionError:
      raise BuildStatusRetrievalError('Cannot retrieve build logs')

+  def delete_log_entries(self, build_id):
+    """
+    Deletes the logs and status keys completely.
+    """
+    self._redis.delete(self._logs_key(build_id))
+
+
  @staticmethod
  def _status_key(build_id):
    return 'builds/%s/status' % build_id
--- a/data/userfiles.py
+++ b/data/userfiles.py
@ -81,12 +81,13 @@ class DelegateUserfiles(object):

    return (url, file_id)

-  def store_file(self, file_like_obj, content_type, file_id=None):
+  def store_file(self, file_like_obj, content_type, content_encoding=None, file_id=None):
    if file_id is None:
      file_id = str(uuid4())

    path = self.get_file_id_path(file_id)
-    self._storage.stream_write(self._locations, path, file_like_obj, content_type)
+    self._storage.stream_write(self._locations, path, file_like_obj, content_type,
+                               content_encoding)
    return file_id

  def get_file_url(self, file_id, expires_in=300, requires_cors=False):
--- a/endpoints/api/build.py
+++ b/endpoints/api/build.py
@ -1,9 +1,9 @@
 import logging
 import json

-from flask import request
+from flask import request, redirect

-from app import app, userfiles as user_files, build_logs
+from app import app, userfiles as user_files, build_logs, log_archive
 from endpoints.api import (RepositoryParamResource, parse_args, query_param, nickname, resource,
                           require_repo_read, require_repo_write, validate_json_request,
                           ApiResource, internal_only, format_date, api, Unauthorized, NotFound)
@ -215,6 +215,10 @@ class RepositoryBuildLogs(RepositoryParamResource):

    build = model.get_repository_build(namespace, repository, build_uuid)

+    # If the logs have been archived, just redirect to the completed archive
+    if build.logs_archived:
+      return redirect(log_archive.get_file_url(build.uuid))
+
    start = int(request.args.get('start', 0))

    try:
--- a/storage/basestorage.py
+++ b/storage/basestorage.py
@ -75,7 +75,7 @@ class BaseStorage(StoragePaths):
  def stream_read_file(self, path):
    raise NotImplementedError

-  def stream_write(self, path, fp, content_type=None):
+  def stream_write(self, path, fp, content_type=None, content_encoding=None):
    raise NotImplementedError

  def list_directory(self, path=None):
--- a/storage/cloud.py
+++ b/storage/cloud.py
@ -128,7 +128,7 @@ class _CloudStorage(BaseStorage):
      raise IOError('No such key: \'{0}\''.format(path))
    return StreamReadKeyAsFile(key)

-  def stream_write(self, path, fp, content_type=None):
+  def stream_write(self, path, fp, content_type=None, content_encoding=None):
    # Minimum size of upload part size on S3 is 5MB
    self._initialize_cloud_conn()
    buffer_size = 5 * 1024 * 1024
@ -140,6 +140,9 @@ class _CloudStorage(BaseStorage):
    if content_type is not None:
      metadata['Content-Type'] = content_type

+    if content_encoding is not None:
+      metadata['Content-Encoding'] = content_encoding
+
    mp = self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
                                                      **self._upload_params)
    num_part = 1
@ -224,7 +227,7 @@ class GoogleCloudStorage(_CloudStorage):
                                             connect_kwargs, upload_params, storage_path,
                                             access_key, secret_key, bucket_name)

-  def stream_write(self, path, fp, content_type=None):
+  def stream_write(self, path, fp, content_type=None, content_encoding=None):
    # Minimum size of upload part size on S3 is 5MB
    self._initialize_cloud_conn()
    path = self._init_path(path)
@ -233,6 +236,9 @@ class GoogleCloudStorage(_CloudStorage):
    if content_type is not None:
      key.set_metadata('Content-Type', content_type)

+    if content_encoding is not None:
+      key.set_metadata('Content-Encoding', content_encoding)
+
    key.set_contents_from_stream(fp)


--- a/storage/fakestorage.py
+++ b/storage/fakestorage.py
@ -14,7 +14,7 @@ class FakeStorage(BaseStorage):
  def stream_read(self, path):
    yield ''

-  def stream_write(self, path, fp, content_type=None):
+  def stream_write(self, path, fp, content_type=None, content_encoding=None):
    pass

  def remove(self, path):
--- a/storage/local.py
+++ b/storage/local.py
@ -43,7 +43,7 @@ class LocalStorage(BaseStorage):
    path = self._init_path(path)
    return io.open(path, mode='rb')

-  def stream_write(self, path, fp, content_type=None):
+  def stream_write(self, path, fp, content_type=None, content_encoding=None):
    # Size is mandatory
    path = self._init_path(path, create=True)
    with open(path, mode='wb') as f:
--- a/test/testlogs.py
+++ b/test/testlogs.py
@ -198,3 +198,11 @@ class TestBuildLogs(RedisBuildLogs):
      return None
    else:
      return super(TestBuildLogs, self).get_status(build_id)
+
+  def delete_log_entries(self, build_id):
+    if build_id == self.test_build_id:
+      return
+    if not self.allow_delegate:
+      return None
+    else:
+      return super(TestBuildLogs, self).delete_log_entries(build_id)
--- a/workers/buildlogsarchiver.py
+++ b/workers/buildlogsarchiver.py
@ -3,10 +3,12 @@ import logging
 from apscheduler.schedulers.blocking import BlockingScheduler
 from peewee import fn
 from tempfile import SpooledTemporaryFile
+from gzip import GzipFile

 from data import model
-from data.database import configure, RepositoryBuild
-from app import app, build_logs, log_archive
+from data.archivedlogs import JSON_MIMETYPE
+from data.database import RepositoryBuild
+from app import build_logs, log_archive
 from util.streamingjsonencoder import StreamingJSONEncoder

 POLL_PERIOD_SECONDS = 30
@ -14,7 +16,7 @@ POLL_PERIOD_SECONDS = 30
 logger = logging.getLogger(__name__)
 sched = BlockingScheduler()

-@sched.scheduled_job(trigger='interval', seconds=5)
+@sched.scheduled_job(trigger='interval', seconds=1)
 def archive_redis_buildlogs():
  """ Archive a single build, choosing a candidate at random. This process must be idempotent to
      avoid needing two-phase commit. """
@ -30,8 +32,19 @@ def archive_redis_buildlogs():
      'logs': entries,
    }

+    with SpooledTemporaryFile() as tempfile:
+      with GzipFile('testarchive', fileobj=tempfile) as zipstream:
        for chunk in StreamingJSONEncoder().iterencode(to_encode):
-      print chunk
+          zipstream.write(chunk)
+
+      tempfile.seek(0)
+      log_archive.store_file(tempfile, JSON_MIMETYPE, content_encoding='gzip',
+                             file_id=to_archive.uuid)
+
+    to_archive.logs_archived = True
+    to_archive.save()
+
+    build_logs.delete_log_entries(to_archive.uuid)

  except RepositoryBuild.DoesNotExist:
    logger.debug('No more builds to archive')