From ef658224103288c1ca9d7583569cde090eced388 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Fri, 15 Apr 2016 16:38:41 -0400 Subject: [PATCH] logrotateworker: perf optimizations This removes our needless transaction, only calculates the cutoff date once, removes the logs generator, and uses a tested optimal MIN_LOGS_PER_ROTATION. --- workers/logrotateworker.py | 41 +++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/workers/logrotateworker.py b/workers/logrotateworker.py index b5028176f..8b5fd2ef7 100644 --- a/workers/logrotateworker.py +++ b/workers/logrotateworker.py @@ -9,7 +9,6 @@ from tempfile import SpooledTemporaryFile import features from app import app, storage from data.database import UseThenDisconnect -from data.model import db_transaction from data.model.log import (get_stale_logs, get_stale_logs_start_id, get_stale_logs_cutoff_id, delete_stale_logs) from data.userfiles import DelegateUserfiles @@ -22,7 +21,7 @@ logger = logging.getLogger(__name__) JSON_MIMETYPE = 'application/json' STALE_AFTER = timedelta(days=30) MIN_LOGS_PER_ROTATION = 10000 -MEMORY_TEMPFILE_SIZE = 64 * 1024 +MEMORY_TEMPFILE_SIZE = 12 * 1024 * 1024 WORKER_FREQUENCY = app.config.get('ACTION_LOG_ROTATION_FREQUENCY', 3600 * 6) SAVE_PATH = app.config.get('ACTION_LOG_ARCHIVE_PATH') @@ -39,6 +38,13 @@ class LogRotateWorker(Worker): log_archive = DelegateUserfiles(app, storage, SAVE_LOCATION, SAVE_PATH) + with UseThenDisconnect(app.config): + cutoff_date = datetime.now() - STALE_AFTER + cutoff_id = get_stale_logs_cutoff_id(cutoff_date) + if cutoff_id is None: + logger.warning('Failed to find cutoff id') + return + while True: with GlobalLock('ACTION_LOG_ROTATION') as gl: if not gl: @@ -46,37 +52,36 @@ class LogRotateWorker(Worker): return with UseThenDisconnect(app.config): - with db_transaction(): - cutoff_date = datetime.now() - STALE_AFTER - start_id = get_stale_logs_start_id() - cutoff_id = get_stale_logs_cutoff_id(cutoff_date) + start_id = get_stale_logs_start_id() - if start_id is None or cutoff_id is None: - logger.warning('No logs to be archived.') - return + if start_id is None: + logger.warning('Failed to find start id') + return - logger.debug('Found starting ID %s and cutoff ID %s', start_id, cutoff_id) + logger.debug('Found starting ID %s and cutoff ID %s', start_id, cutoff_id) - approx_count = cutoff_id - start_id - if approx_count < MIN_LOGS_PER_ROTATION: - logger.debug('Not enough stale logs to warrant rotation (approx %d)', approx_count) - return + approx_count = cutoff_id - start_id + if approx_count < MIN_LOGS_PER_ROTATION: + logger.debug('Not enough stale logs to warrant rotation (approx %d)', approx_count) + return - end_id = start_id + MIN_LOGS_PER_ROTATION - logs_generator = (log_dict(log) for log in get_stale_logs(start_id, end_id)) + end_id = start_id + MIN_LOGS_PER_ROTATION + logs = [log_dict(log) for log in get_stale_logs(start_id, end_id)] logger.debug('Archiving logs from IDs %s to %s', start_id, end_id) with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile: - with GzipFile('temp_action_log_rotate', fileobj=tempfile) as zipstream: - for chunk in StreamingJSONEncoder().iterencode(logs_generator): + with GzipFile('temp_action_log_rotate', fileobj=tempfile, compresslevel=1) as zipstream: + for chunk in StreamingJSONEncoder().iterencode(logs): zipstream.write(chunk) tempfile.seek(0) filename = '%d-%d.txt.gz' % (start_id, end_id) log_archive.store_file(tempfile, JSON_MIMETYPE, content_encoding='gzip', file_id=filename) + logger.debug('Finished archiving logs from IDs %s to %s', start_id, end_id) with UseThenDisconnect(app.config): + logger.debug('Deleting logs from IDs %s to %s', start_id, end_id) delete_stale_logs(start_id, end_id)