2015-07-15 21:25:41 +00:00
|
|
|
import json
|
2017-02-15 20:44:08 +00:00
|
|
|
import logging
|
2015-07-15 21:25:41 +00:00
|
|
|
|
2016-04-01 17:55:29 +00:00
|
|
|
from calendar import timegm
|
2017-02-15 20:44:08 +00:00
|
|
|
from peewee import JOIN_LEFT_OUTER, fn, PeeweeException
|
2016-08-12 20:53:17 +00:00
|
|
|
from datetime import datetime, timedelta
|
2015-08-05 21:36:17 +00:00
|
|
|
from cachetools import lru_cache
|
2015-07-15 21:25:41 +00:00
|
|
|
|
2016-06-22 18:50:59 +00:00
|
|
|
from data.database import LogEntry, LogEntryKind, User, RepositoryActionCount, db
|
2016-08-12 20:53:17 +00:00
|
|
|
from data.model import config, user, DataModelException
|
2015-07-15 21:25:41 +00:00
|
|
|
|
2017-02-15 20:44:08 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2017-04-07 15:39:54 +00:00
|
|
|
ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING = ['pull_repo']
|
|
|
|
|
2016-04-26 19:16:55 +00:00
|
|
|
def _logs_query(selections, start_time, end_time, performer=None, repository=None, namespace=None,
|
|
|
|
ignore=None):
|
2015-07-15 21:25:41 +00:00
|
|
|
joined = (LogEntry
|
2015-07-31 17:38:02 +00:00
|
|
|
.select(*selections)
|
2015-07-15 21:25:41 +00:00
|
|
|
.switch(LogEntry)
|
2015-07-31 17:38:02 +00:00
|
|
|
.where(LogEntry.datetime >= start_time, LogEntry.datetime < end_time))
|
2015-07-15 21:25:41 +00:00
|
|
|
|
|
|
|
if repository:
|
|
|
|
joined = joined.where(LogEntry.repository == repository)
|
|
|
|
|
|
|
|
if performer:
|
|
|
|
joined = joined.where(LogEntry.performer == performer)
|
|
|
|
|
|
|
|
if namespace:
|
2016-08-12 20:53:17 +00:00
|
|
|
namespace_user = user.get_user_or_org(namespace)
|
|
|
|
if namespace_user is None:
|
|
|
|
raise DataModelException('Invalid namespace requested')
|
|
|
|
|
|
|
|
joined = joined.where(LogEntry.account == namespace_user.id)
|
2015-07-31 17:38:02 +00:00
|
|
|
|
2016-04-26 19:16:55 +00:00
|
|
|
if ignore:
|
|
|
|
kind_map = get_log_entry_kinds()
|
|
|
|
ignore_ids = [kind_map[kind_name] for kind_name in ignore]
|
|
|
|
joined = joined.where(~(LogEntry.kind << ignore_ids))
|
|
|
|
|
2015-07-31 17:38:02 +00:00
|
|
|
return joined
|
|
|
|
|
|
|
|
|
2015-08-05 21:36:17 +00:00
|
|
|
@lru_cache(maxsize=1)
|
|
|
|
def get_log_entry_kinds():
|
|
|
|
kind_map = {}
|
|
|
|
for kind in LogEntryKind.select():
|
|
|
|
kind_map[kind.id] = kind.name
|
2016-04-26 19:16:55 +00:00
|
|
|
kind_map[kind.name] = kind.id
|
2015-08-05 21:36:17 +00:00
|
|
|
|
|
|
|
return kind_map
|
|
|
|
|
|
|
|
|
2016-05-13 19:20:55 +00:00
|
|
|
def _get_log_entry_kind(name):
|
|
|
|
kinds = get_log_entry_kinds()
|
|
|
|
return kinds[name]
|
|
|
|
|
|
|
|
|
2016-04-26 19:16:55 +00:00
|
|
|
def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None,
|
|
|
|
ignore=None):
|
2015-08-05 21:36:17 +00:00
|
|
|
date = db.extract_date('day', LogEntry.datetime)
|
2015-08-06 16:52:55 +00:00
|
|
|
selections = [LogEntry.kind, date.alias('day'), fn.Count(LogEntry.id).alias('count')]
|
2016-04-26 19:16:55 +00:00
|
|
|
query = _logs_query(selections, start_time, end_time, performer, repository, namespace, ignore)
|
2015-08-05 21:36:17 +00:00
|
|
|
return query.group_by(date, LogEntry.kind)
|
2015-07-31 17:38:02 +00:00
|
|
|
|
|
|
|
|
2016-04-26 19:16:55 +00:00
|
|
|
def get_logs_query(start_time, end_time, performer=None, repository=None, namespace=None,
|
|
|
|
ignore=None):
|
2015-07-31 17:38:02 +00:00
|
|
|
Performer = User.alias()
|
2017-02-14 19:55:24 +00:00
|
|
|
Account = User.alias()
|
2015-08-05 21:36:17 +00:00
|
|
|
selections = [LogEntry, Performer]
|
2015-07-31 17:38:02 +00:00
|
|
|
|
2017-02-14 19:55:24 +00:00
|
|
|
if namespace is None and repository is None:
|
|
|
|
selections.append(Account)
|
|
|
|
|
2016-04-26 19:16:55 +00:00
|
|
|
query = _logs_query(selections, start_time, end_time, performer, repository, namespace, ignore)
|
2015-07-31 17:38:02 +00:00
|
|
|
query = (query.switch(LogEntry)
|
|
|
|
.join(Performer, JOIN_LEFT_OUTER,
|
|
|
|
on=(LogEntry.performer == Performer.id).alias('performer')))
|
|
|
|
|
2017-02-14 19:55:24 +00:00
|
|
|
if namespace is None and repository is None:
|
|
|
|
query = (query.switch(LogEntry)
|
|
|
|
.join(Account, JOIN_LEFT_OUTER,
|
|
|
|
on=(LogEntry.account == Account.id).alias('account')))
|
|
|
|
|
2015-12-22 14:05:17 +00:00
|
|
|
return query
|
2015-07-15 21:25:41 +00:00
|
|
|
|
|
|
|
|
2016-04-01 17:55:29 +00:00
|
|
|
def _json_serialize(obj):
|
|
|
|
if isinstance(obj, datetime):
|
|
|
|
return timegm(obj.utctimetuple())
|
|
|
|
|
|
|
|
return obj
|
|
|
|
|
|
|
|
|
2015-07-15 21:25:41 +00:00
|
|
|
def log_action(kind_name, user_or_organization_name, performer=None, repository=None,
|
|
|
|
ip=None, metadata={}, timestamp=None):
|
|
|
|
if not timestamp:
|
|
|
|
timestamp = datetime.today()
|
|
|
|
|
2016-04-01 17:55:29 +00:00
|
|
|
account = None
|
|
|
|
if user_or_organization_name is not None:
|
|
|
|
account = User.get(User.username == user_or_organization_name).id
|
2016-04-26 19:16:55 +00:00
|
|
|
else:
|
|
|
|
account = config.app_config.get('SERVICE_LOG_ACCOUNT_ID')
|
|
|
|
if account is None:
|
|
|
|
account = User.select(fn.Min(User.id)).tuples().get()[0]
|
2016-04-01 17:55:29 +00:00
|
|
|
|
2016-08-05 20:37:04 +00:00
|
|
|
if performer is not None:
|
|
|
|
performer = performer.id
|
|
|
|
|
|
|
|
if repository is not None:
|
|
|
|
repository = repository.id
|
|
|
|
|
2016-05-13 19:20:55 +00:00
|
|
|
kind = _get_log_entry_kind(kind_name)
|
2016-04-01 17:55:29 +00:00
|
|
|
metadata_json = json.dumps(metadata, default=_json_serialize)
|
2017-03-13 14:52:36 +00:00
|
|
|
log_data = {
|
|
|
|
'kind': kind,
|
|
|
|
'account': account,
|
|
|
|
'performer': performer,
|
|
|
|
'repository': repository,
|
|
|
|
'ip': ip,
|
|
|
|
'metadata_json': metadata_json,
|
|
|
|
'datetime': timestamp
|
|
|
|
}
|
2017-04-07 15:39:54 +00:00
|
|
|
|
2017-02-15 20:44:08 +00:00
|
|
|
try:
|
2017-03-13 14:52:36 +00:00
|
|
|
LogEntry.create(**log_data)
|
2017-02-28 04:57:40 +00:00
|
|
|
except PeeweeException as ex:
|
2017-04-07 15:39:54 +00:00
|
|
|
strict_logging_disabled = config.app_config.get('ALLOW_PULLS_WITHOUT_STRICT_LOGGING')
|
|
|
|
if strict_logging_disabled and kind_name in ACTIONS_ALLOWED_WITHOUT_AUDIT_LOGGING:
|
2017-03-13 14:52:36 +00:00
|
|
|
logger.exception('log_action failed', extra=({'exception': ex}).update(log_data))
|
2017-02-15 20:44:08 +00:00
|
|
|
else:
|
|
|
|
raise
|
|
|
|
|
2015-07-15 21:25:41 +00:00
|
|
|
|
|
|
|
|
2015-10-09 19:41:56 +00:00
|
|
|
def get_stale_logs_start_id():
|
|
|
|
""" Gets the oldest log entry. """
|
|
|
|
try:
|
|
|
|
return (LogEntry
|
|
|
|
.select(LogEntry.id)
|
|
|
|
.order_by(LogEntry.id)
|
|
|
|
.limit(1)
|
|
|
|
.tuples())[0][0]
|
|
|
|
except IndexError:
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2016-02-09 20:20:52 +00:00
|
|
|
def get_stale_logs_cutoff_id(cutoff_date):
|
2015-10-09 19:41:56 +00:00
|
|
|
""" Gets the most recent ID created before the cutoff_date. """
|
|
|
|
try:
|
|
|
|
return (LogEntry
|
2016-05-11 03:13:10 +00:00
|
|
|
.select(fn.Max(LogEntry.id))
|
2015-10-09 19:41:56 +00:00
|
|
|
.where(LogEntry.datetime <= cutoff_date)
|
|
|
|
.tuples())[0][0]
|
|
|
|
except IndexError:
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_stale_logs(start_id, end_id):
|
|
|
|
""" Returns all the logs with IDs between start_id and end_id inclusively. """
|
|
|
|
return LogEntry.select().where((LogEntry.id >= start_id), (LogEntry.id <= end_id))
|
|
|
|
|
|
|
|
|
|
|
|
def delete_stale_logs(start_id, end_id):
|
|
|
|
""" Deletes all the logs with IDs between start_id and end_id. """
|
|
|
|
LogEntry.delete().where((LogEntry.id >= start_id), (LogEntry.id <= end_id)).execute()
|
2016-06-22 18:50:59 +00:00
|
|
|
|
|
|
|
|
|
|
|
def get_repository_action_counts(repo, start_date):
|
|
|
|
return RepositoryActionCount.select().where(RepositoryActionCount.repository == repo,
|
|
|
|
RepositoryActionCount.date >= start_date)
|
|
|
|
|
|
|
|
|
|
|
|
def get_repositories_action_sums(repository_ids):
|
|
|
|
if not repository_ids:
|
|
|
|
return {}
|
|
|
|
|
|
|
|
# Filter the join to recent entries only.
|
|
|
|
last_week = datetime.now() - timedelta(weeks=1)
|
|
|
|
tuples = (RepositoryActionCount
|
|
|
|
.select(RepositoryActionCount.repository, fn.Sum(RepositoryActionCount.count))
|
|
|
|
.where(RepositoryActionCount.repository << repository_ids)
|
|
|
|
.where(RepositoryActionCount.date >= last_week)
|
|
|
|
.group_by(RepositoryActionCount.repository)
|
|
|
|
.tuples())
|
|
|
|
|
|
|
|
action_count_map = {}
|
|
|
|
for record in tuples:
|
|
|
|
action_count_map[record[0]] = record[1]
|
|
|
|
|
|
|
|
return action_count_map
|