Switch the build logs archiver to a more performant query

Fixes #459
This commit is contained in:
Joseph Schorr 2015-09-09 13:59:45 -04:00
parent e81a50aa9a
commit 3ee4147117
3 changed files with 84 additions and 32 deletions

View file

@ -24,34 +24,34 @@ class ArchiveBuildLogsWorker(Worker):
def _archive_redis_buildlogs(self):
""" Archive a single build, choosing a candidate at random. This process must be idempotent to
avoid needing two-phase commit. """
try:
# Get a random build to archive
to_archive = model.build.archivable_buildlogs_query().order_by(db_random_func()).get()
logger.debug('Archiving: %s', to_archive.uuid)
length, entries = build_logs.get_log_entries(to_archive.uuid, 0)
to_encode = {
'start': 0,
'total': length,
'logs': entries,
}
with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile:
with GzipFile('testarchive', fileobj=tempfile) as zipstream:
for chunk in StreamingJSONEncoder().iterencode(to_encode):
zipstream.write(chunk)
tempfile.seek(0)
log_archive.store_file(tempfile, JSON_MIMETYPE, content_encoding='gzip',
file_id=to_archive.uuid)
to_archive.logs_archived = True
to_archive.save()
build_logs.expire_log_entries(to_archive.uuid)
except RepositoryBuild.DoesNotExist:
# Get a random build to archive
to_archive = model.build.get_archivable_build()
if to_archive is None:
logger.debug('No more builds to archive')
return
logger.debug('Archiving: %s', to_archive.uuid)
length, entries = build_logs.get_log_entries(to_archive.uuid, 0)
to_encode = {
'start': 0,
'total': length,
'logs': entries,
}
with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile:
with GzipFile('testarchive', fileobj=tempfile) as zipstream:
for chunk in StreamingJSONEncoder().iterencode(to_encode):
zipstream.write(chunk)
tempfile.seek(0)
log_archive.store_file(tempfile, JSON_MIMETYPE, content_encoding='gzip',
file_id=to_archive.uuid)
to_archive.logs_archived = True
to_archive.save()
build_logs.expire_log_entries(to_archive.uuid)
if __name__ == "__main__":