Optimize the logs archiving worker to not issue extremely long queries

Fixes https://jira.coreos.com/browse/QUAY-1371
This commit is contained in:
Joseph Schorr 2019-02-26 14:37:42 -05:00
parent 2bebea23f7
commit 91e9fe8050
3 changed files with 79 additions and 32 deletions

View file

@ -10,7 +10,7 @@ import features
from app import app, storage
from data.database import UseThenDisconnect, LogEntry, LogEntry2, LogEntry3
from data.model.log import (get_stale_logs, get_stale_logs_start_id,
get_stale_logs_cutoff_id, delete_stale_logs)
delete_stale_logs)
from data.userfiles import DelegateUserfiles
from util.locking import GlobalLock, LockNotAcquiredException
from util.log import logfile_path
@ -21,7 +21,7 @@ from workers.worker import Worker
logger = logging.getLogger(__name__)
JSON_MIMETYPE = 'application/json'
MIN_LOGS_PER_ROTATION = 10000
MIN_LOGS_PER_ROTATION = 5000
MEMORY_TEMPFILE_SIZE = 12 * 1024 * 1024
WORKER_FREQUENCY = app.config.get('ACTION_LOG_ROTATION_FREQUENCY', 60 * 60 * 12)
@ -44,22 +44,16 @@ class LogRotateWorker(Worker):
def _archive_logs_for_model(self, model):
logger.debug('Attempting to rotate log entries')
with UseThenDisconnect(app.config):
cutoff_date = datetime.now() - STALE_AFTER
cutoff_id = get_stale_logs_cutoff_id(cutoff_date, model)
if cutoff_id is None:
logger.warning('Failed to find cutoff id')
return
logs_archived = True
cutoff_date = datetime.now() - STALE_AFTER
while logs_archived:
try:
with GlobalLock('ACTION_LOG_ROTATION'):
logs_archived = self._perform_archiving(cutoff_id, model)
logs_archived = self._perform_archiving(model, cutoff_date)
except LockNotAcquiredException:
return
def _perform_archiving(self, cutoff_id, model):
def _perform_archiving(self, model, cutoff_date):
save_location = SAVE_LOCATION
if not save_location:
# Pick the *same* save location for all instances. This is a fallback if
@ -75,20 +69,21 @@ class LogRotateWorker(Worker):
logger.warning('Failed to find start id')
return False
logger.debug('Found starting ID %s and cutoff ID %s', start_id, cutoff_id)
logger.debug('Found starting ID %s', start_id)
lookup_end_id = start_id + MIN_LOGS_PER_ROTATION
logs = [log for log in get_stale_logs(start_id, lookup_end_id, model, cutoff_date)]
approx_count = cutoff_id - start_id
if approx_count < MIN_LOGS_PER_ROTATION:
logger.debug('Not enough stale logs to warrant rotation (approx %d)', approx_count)
return False
if not logs:
logger.debug('No further logs found')
return False
end_id = start_id + MIN_LOGS_PER_ROTATION
logs = [log_dict(log) for log in get_stale_logs(start_id, end_id, model)]
end_id = max([log.id for log in logs])
formatted_logs = [log_dict(log) for log in logs]
logger.debug('Archiving logs from IDs %s to %s', start_id, end_id)
with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile:
with GzipFile('temp_action_log_rotate', fileobj=tempfile, compresslevel=1) as zipstream:
for chunk in StreamingJSONEncoder().iterencode(logs):
for chunk in StreamingJSONEncoder().iterencode(formatted_logs):
zipstream.write(chunk)
tempfile.seek(0)