2015-10-09 19:41:56 +00:00
|
|
|
import logging
|
|
|
|
import json
|
|
|
|
import time
|
|
|
|
|
|
|
|
from datetime import timedelta, datetime
|
2016-04-08 17:04:55 +00:00
|
|
|
from gzip import GzipFile
|
|
|
|
from tempfile import SpooledTemporaryFile
|
2015-10-09 19:41:56 +00:00
|
|
|
|
|
|
|
import features
|
|
|
|
from app import app, storage
|
2019-01-03 18:50:43 +00:00
|
|
|
from data.database import UseThenDisconnect, LogEntry, LogEntry2, LogEntry3
|
2015-10-09 19:41:56 +00:00
|
|
|
from data.model.log import (get_stale_logs, get_stale_logs_start_id,
|
2016-02-09 20:20:52 +00:00
|
|
|
get_stale_logs_cutoff_id, delete_stale_logs)
|
2016-04-08 17:04:55 +00:00
|
|
|
from data.userfiles import DelegateUserfiles
|
2016-08-29 15:28:53 +00:00
|
|
|
from util.locking import GlobalLock, LockNotAcquiredException
|
2017-02-01 23:17:25 +00:00
|
|
|
from util.log import logfile_path
|
2016-04-08 17:04:55 +00:00
|
|
|
from util.streamingjsonencoder import StreamingJSONEncoder
|
2018-06-29 20:07:51 +00:00
|
|
|
from util.timedeltastring import convert_to_timedelta
|
2016-03-24 18:04:52 +00:00
|
|
|
from workers.worker import Worker
|
2015-10-09 19:41:56 +00:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2016-04-08 17:04:55 +00:00
|
|
|
JSON_MIMETYPE = 'application/json'
|
2015-10-09 19:41:56 +00:00
|
|
|
MIN_LOGS_PER_ROTATION = 10000
|
2016-04-15 20:38:41 +00:00
|
|
|
MEMORY_TEMPFILE_SIZE = 12 * 1024 * 1024
|
2016-03-24 18:04:52 +00:00
|
|
|
|
2016-08-29 15:28:53 +00:00
|
|
|
WORKER_FREQUENCY = app.config.get('ACTION_LOG_ROTATION_FREQUENCY', 60 * 60 * 12)
|
2018-06-29 20:07:51 +00:00
|
|
|
STALE_AFTER = convert_to_timedelta(app.config.get('ACTION_LOG_ROTATION_THRESHOLD', '30d'))
|
2015-10-09 19:41:56 +00:00
|
|
|
SAVE_PATH = app.config.get('ACTION_LOG_ARCHIVE_PATH')
|
|
|
|
SAVE_LOCATION = app.config.get('ACTION_LOG_ARCHIVE_LOCATION')
|
|
|
|
|
2016-03-24 18:04:52 +00:00
|
|
|
class LogRotateWorker(Worker):
|
2015-10-09 19:41:56 +00:00
|
|
|
""" Worker used to rotate old logs out the database and into storage. """
|
|
|
|
def __init__(self):
|
2016-03-24 18:04:52 +00:00
|
|
|
super(LogRotateWorker, self).__init__()
|
|
|
|
self.add_operation(self._archive_logs, WORKER_FREQUENCY)
|
2015-10-09 19:41:56 +00:00
|
|
|
|
2016-03-24 18:04:52 +00:00
|
|
|
def _archive_logs(self):
|
2019-01-03 18:50:43 +00:00
|
|
|
# TODO(LogMigrate): Remove the branch once we're back on a single table.
|
|
|
|
models = [LogEntry, LogEntry2, LogEntry3]
|
2019-01-02 18:29:35 +00:00
|
|
|
for model in models:
|
|
|
|
self._archive_logs_for_model(model)
|
2018-05-18 16:54:38 +00:00
|
|
|
|
|
|
|
def _archive_logs_for_model(self, model):
|
2015-10-09 19:41:56 +00:00
|
|
|
logger.debug('Attempting to rotate log entries')
|
2016-04-15 17:51:54 +00:00
|
|
|
|
2016-04-15 20:38:41 +00:00
|
|
|
with UseThenDisconnect(app.config):
|
|
|
|
cutoff_date = datetime.now() - STALE_AFTER
|
2018-05-18 16:54:38 +00:00
|
|
|
cutoff_id = get_stale_logs_cutoff_id(cutoff_date, model)
|
2016-04-15 20:38:41 +00:00
|
|
|
if cutoff_id is None:
|
|
|
|
logger.warning('Failed to find cutoff id')
|
|
|
|
return
|
|
|
|
|
2016-08-29 15:28:53 +00:00
|
|
|
logs_archived = True
|
|
|
|
while logs_archived:
|
|
|
|
try:
|
|
|
|
with GlobalLock('ACTION_LOG_ROTATION'):
|
2018-05-18 16:54:38 +00:00
|
|
|
logs_archived = self._perform_archiving(cutoff_id, model)
|
2016-08-29 15:28:53 +00:00
|
|
|
except LockNotAcquiredException:
|
|
|
|
return
|
|
|
|
|
2018-05-18 16:54:38 +00:00
|
|
|
def _perform_archiving(self, cutoff_id, model):
|
2017-07-10 09:37:44 +00:00
|
|
|
save_location = SAVE_LOCATION
|
|
|
|
if not save_location:
|
|
|
|
# Pick the *same* save location for all instances. This is a fallback if
|
|
|
|
# a location was not configured.
|
|
|
|
save_location = storage.locations[0]
|
|
|
|
|
|
|
|
log_archive = DelegateUserfiles(app, storage, save_location, SAVE_PATH)
|
2016-08-29 15:28:53 +00:00
|
|
|
|
|
|
|
with UseThenDisconnect(app.config):
|
2018-05-18 16:54:38 +00:00
|
|
|
start_id = get_stale_logs_start_id(model)
|
2016-08-29 15:28:53 +00:00
|
|
|
|
|
|
|
if start_id is None:
|
|
|
|
logger.warning('Failed to find start id')
|
|
|
|
return False
|
|
|
|
|
|
|
|
logger.debug('Found starting ID %s and cutoff ID %s', start_id, cutoff_id)
|
|
|
|
|
|
|
|
approx_count = cutoff_id - start_id
|
|
|
|
if approx_count < MIN_LOGS_PER_ROTATION:
|
|
|
|
logger.debug('Not enough stale logs to warrant rotation (approx %d)', approx_count)
|
|
|
|
return False
|
|
|
|
|
|
|
|
end_id = start_id + MIN_LOGS_PER_ROTATION
|
2018-05-18 16:54:38 +00:00
|
|
|
logs = [log_dict(log) for log in get_stale_logs(start_id, end_id, model)]
|
2016-08-29 15:28:53 +00:00
|
|
|
|
|
|
|
logger.debug('Archiving logs from IDs %s to %s', start_id, end_id)
|
|
|
|
with SpooledTemporaryFile(MEMORY_TEMPFILE_SIZE) as tempfile:
|
|
|
|
with GzipFile('temp_action_log_rotate', fileobj=tempfile, compresslevel=1) as zipstream:
|
|
|
|
for chunk in StreamingJSONEncoder().iterencode(logs):
|
|
|
|
zipstream.write(chunk)
|
|
|
|
|
|
|
|
tempfile.seek(0)
|
2018-05-18 16:54:38 +00:00
|
|
|
filename = '%d-%d-%s.txt.gz' % (start_id, end_id, model.__name__.lower())
|
2016-08-29 15:28:53 +00:00
|
|
|
log_archive.store_file(tempfile, JSON_MIMETYPE, content_encoding='gzip',
|
|
|
|
file_id=filename)
|
|
|
|
logger.debug('Finished archiving logs from IDs %s to %s', start_id, end_id)
|
|
|
|
|
|
|
|
with UseThenDisconnect(app.config):
|
|
|
|
logger.debug('Deleting logs from IDs %s to %s', start_id, end_id)
|
2018-05-18 16:54:38 +00:00
|
|
|
delete_stale_logs(start_id, end_id, model)
|
2016-08-29 15:28:53 +00:00
|
|
|
|
|
|
|
return True
|
2016-02-09 20:20:52 +00:00
|
|
|
|
2015-10-09 19:41:56 +00:00
|
|
|
|
2016-04-08 17:04:55 +00:00
|
|
|
def log_dict(log):
|
2015-10-09 19:41:56 +00:00
|
|
|
""" Pretty prints a LogEntry in JSON. """
|
2018-09-07 17:34:22 +00:00
|
|
|
try:
|
|
|
|
metadata_json = json.loads(str(log.metadata_json))
|
|
|
|
except ValueError:
|
|
|
|
logger.exception('Could not parse metadata JSON for log entry %s', log.id)
|
|
|
|
metadata_json = {'__raw': log.metadata_json}
|
|
|
|
except TypeError:
|
|
|
|
logger.exception('Could not parse metadata JSON for log entry %s', log.id)
|
|
|
|
metadata_json = {'__raw': log.metadata_json}
|
|
|
|
|
|
|
|
return {
|
|
|
|
'kind_id': log.kind_id,
|
|
|
|
'account_id': log.account_id,
|
|
|
|
'performer_id': log.performer_id,
|
|
|
|
'repository_id': log.repository_id,
|
|
|
|
'datetime': str(log.datetime),
|
|
|
|
'ip': str(log.ip),
|
|
|
|
'metadata_json': metadata_json,
|
|
|
|
}
|
2015-10-09 19:41:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2017-08-24 18:25:51 +00:00
|
|
|
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
|
2016-03-24 18:04:52 +00:00
|
|
|
|
2019-01-02 19:17:40 +00:00
|
|
|
if not features.LOG_EXPORT:
|
|
|
|
logger.debug('Log export not enabled; skipping')
|
2015-10-09 19:41:56 +00:00
|
|
|
while True:
|
|
|
|
time.sleep(100000)
|
|
|
|
|
|
|
|
worker = LogRotateWorker()
|
|
|
|
worker.start()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|