This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/data/model/log.py

127 lines
4.7 KiB
Python
Raw Normal View History

import json
from peewee import JOIN_LEFT_OUTER, SQL, fn
from datetime import datetime, timedelta, date
from cachetools import lru_cache
from data.database import LogEntry, LogEntryKind, User, db
# TODO: Find a way to get logs without slowing down pagination significantly.
def _logs_query(selections, start_time, end_time, performer=None, repository=None, namespace=None):
joined = (LogEntry
.select(*selections)
.switch(LogEntry)
.where(LogEntry.datetime >= start_time, LogEntry.datetime < end_time))
if repository:
joined = joined.where(LogEntry.repository == repository)
if performer:
joined = joined.where(LogEntry.performer == performer)
if namespace:
joined = joined.join(User).where(User.username == namespace)
return joined
@lru_cache(maxsize=1)
def get_log_entry_kinds():
kind_map = {}
for kind in LogEntryKind.select():
kind_map[kind.id] = kind.name
return kind_map
def get_aggregated_logs(start_time, end_time, performer=None, repository=None, namespace=None):
date = db.extract_date('day', LogEntry.datetime)
selections = [LogEntry.kind, date.alias('day'), fn.Count(LogEntry.id).alias('count')]
query = _logs_query(selections, start_time, end_time, performer, repository, namespace)
return query.group_by(date, LogEntry.kind)
def list_logs(start_time, end_time, performer=None, repository=None, namespace=None, page=None,
count=None):
Performer = User.alias()
selections = [LogEntry, Performer]
query = _logs_query(selections, start_time, end_time, performer, repository, namespace)
query = (query.switch(LogEntry)
.join(Performer, JOIN_LEFT_OUTER,
on=(LogEntry.performer == Performer.id).alias('performer')))
if page and count:
query = query.paginate(page, count)
return list(query.order_by(LogEntry.datetime.desc()))
def log_action(kind_name, user_or_organization_name, performer=None, repository=None,
ip=None, metadata={}, timestamp=None):
if not timestamp:
timestamp = datetime.today()
kind = LogEntryKind.get(LogEntryKind.name == kind_name)
account = User.get(User.username == user_or_organization_name)
LogEntry.create(kind=kind, account=account, performer=performer,
repository=repository, ip=ip, metadata_json=json.dumps(metadata),
datetime=timestamp)
def _get_repository_events(repository, time_delta, time_delta_earlier, clause):
""" Returns a pair representing the count of the number of events for the given
repository in each of the specified time deltas. The date ranges are calculated by
taking the current time today and subtracting the time delta given. Since
we want to grab *two* ranges, we restrict the second range to be greater
than the first (i.e. referring to an earlier time), so we can conduct the
lookup in a single query. The clause is used to further filter the kind of
events being found.
"""
since = date.today() - time_delta
since_earlier = date.today() - time_delta_earlier
if since_earlier >= since:
raise ValueError('time_delta_earlier must be greater than time_delta')
# This uses a CASE WHEN inner clause to further filter the count.
formatted = since.strftime('%Y-%m-%d')
case_query = 'CASE WHEN datetime >= \'%s\' THEN 1 ELSE 0 END' % formatted
result = (LogEntry
.select(fn.Sum(SQL(case_query)), fn.Count(SQL('*')))
.where(LogEntry.repository == repository)
.where(clause)
.where(LogEntry.datetime >= since_earlier)
.tuples()
.get())
return (int(result[0]) if result[0] else 0, int(result[1]) if result[1] else 0)
def get_repository_pushes(repository, time_delta, time_delta_earlier):
push_repo = LogEntryKind.get(name='push_repo')
clauses = (LogEntry.kind == push_repo)
return _get_repository_events(repository, time_delta, time_delta_earlier, clauses)
def get_repository_pulls(repository, time_delta, time_delta_earlier):
repo_pull = LogEntryKind.get(name='pull_repo')
repo_verb = LogEntryKind.get(name='repo_verb')
clauses = ((LogEntry.kind == repo_pull) | (LogEntry.kind == repo_verb))
return _get_repository_events(repository, time_delta, time_delta_earlier, clauses)
def get_repository_usage():
one_month_ago = date.today() - timedelta(weeks=4)
repo_pull = LogEntryKind.get(name='pull_repo')
repo_verb = LogEntryKind.get(name='repo_verb')
return (LogEntry
.select(LogEntry.ip, LogEntry.repository)
.where((LogEntry.kind == repo_pull) | (LogEntry.kind == repo_verb))
.where(~(LogEntry.repository >> None))
.where(LogEntry.datetime >= one_month_ago)
.group_by(LogEntry.ip, LogEntry.repository)
.count())