Make the repository API faster by only checking the log entries table once for each kind of entry, rather than twice. We make use of a special subquery-like syntax, which allows us to count those entries that are both 30 days only and 1 day old in the same query. This was tested successfully on MySQL, Postgres and Sqlite.

This commit is contained in:
Joseph Schorr 2015-05-07 22:49:11 -04:00
parent 469f25b64c
commit c767aafcd6
2 changed files with 39 additions and 22 deletions

View file

@ -19,7 +19,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem,
ImageStorageSignatureKind, validate_database_url, db_for_update,
AccessTokenKind, Star, get_epoch_timestamp, RepositoryActionCount)
from peewee import JOIN_LEFT_OUTER, fn
from peewee import JOIN_LEFT_OUTER, fn, SQL
from util.validation import (validate_username, validate_email, validate_password,
INVALID_PASSWORD_MESSAGE)
from util.names import format_robot_username, parse_robot_username
@ -2777,24 +2777,38 @@ def cancel_repository_build(build, work_queue):
build.delete_instance()
return True
def get_repository_pushes(repository, time_delta):
def _get_repository_events(repository, time_delta, time_delta_earlier, clause):
since = date.today() - time_delta
push_repo = LogEntryKind.get(name = 'push_repo')
return (LogEntry.select()
.where(LogEntry.repository == repository)
.where(LogEntry.kind == push_repo)
.where(LogEntry.datetime >= since)
.count())
since_earlier = date.today() - time_delta_earlier
def get_repository_pulls(repository, time_delta):
since = date.today() - time_delta
if since_earlier >= since:
raise Exception('time_delta_earlier must be greater than time_delta')
# This uses a CASE WHEN inner clause to further filter the count.
formatted = since.strftime('%Y-%m-%d')
case_query = 'CASE WHEN datetime >= \'%s\' THEN 1 ELSE 0 END' % formatted
result = (LogEntry.select(fn.Sum(SQL(case_query)), fn.Count(SQL('*')))
.where(LogEntry.repository == repository)
.where(clause)
.where(LogEntry.datetime >= since_earlier)
.tuples()
.get())
return (result[0] or 0, result[1] or 0)
def get_repository_pushes(repository, time_delta, time_delta_earlier):
push_repo = LogEntryKind.get(name = 'push_repo')
clauses = (LogEntry.kind == push_repo)
return _get_repository_events(repository, time_delta, time_delta_earlier, clauses)
def get_repository_pulls(repository, time_delta, time_delta_earlier):
repo_pull = LogEntryKind.get(name = 'pull_repo')
repo_verb = LogEntryKind.get(name = 'repo_verb')
return (LogEntry.select()
.where(LogEntry.repository == repository)
.where((LogEntry.kind == repo_pull) | (LogEntry.kind == repo_verb))
.where(LogEntry.datetime >= since)
.count())
clauses = ((LogEntry.kind == repo_pull) | (LogEntry.kind == repo_verb))
return _get_repository_events(repository, time_delta, time_delta_earlier, clauses)
def get_repository_usage():