Make the repository API faster by only checking the log entries table once for each kind of entry, rather than twice. We make use of a special subquery-like syntax, which allows us to count those entries that are both 30 days only and 1 day old in the same query. This was tested successfully on MySQL, Postgres and Sqlite.

This commit is contained in:
Joseph Schorr 2015-05-07 22:49:11 -04:00
parent 469f25b64c
commit c767aafcd6
2 changed files with 39 additions and 22 deletions

View file

@ -19,7 +19,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem, db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem,
ImageStorageSignatureKind, validate_database_url, db_for_update, ImageStorageSignatureKind, validate_database_url, db_for_update,
AccessTokenKind, Star, get_epoch_timestamp, RepositoryActionCount) AccessTokenKind, Star, get_epoch_timestamp, RepositoryActionCount)
from peewee import JOIN_LEFT_OUTER, fn from peewee import JOIN_LEFT_OUTER, fn, SQL
from util.validation import (validate_username, validate_email, validate_password, from util.validation import (validate_username, validate_email, validate_password,
INVALID_PASSWORD_MESSAGE) INVALID_PASSWORD_MESSAGE)
from util.names import format_robot_username, parse_robot_username from util.names import format_robot_username, parse_robot_username
@ -2777,24 +2777,38 @@ def cancel_repository_build(build, work_queue):
build.delete_instance() build.delete_instance()
return True return True
def get_repository_pushes(repository, time_delta): def _get_repository_events(repository, time_delta, time_delta_earlier, clause):
since = date.today() - time_delta since = date.today() - time_delta
push_repo = LogEntryKind.get(name = 'push_repo') since_earlier = date.today() - time_delta_earlier
return (LogEntry.select()
.where(LogEntry.repository == repository)
.where(LogEntry.kind == push_repo)
.where(LogEntry.datetime >= since)
.count())
def get_repository_pulls(repository, time_delta): if since_earlier >= since:
since = date.today() - time_delta raise Exception('time_delta_earlier must be greater than time_delta')
# This uses a CASE WHEN inner clause to further filter the count.
formatted = since.strftime('%Y-%m-%d')
case_query = 'CASE WHEN datetime >= \'%s\' THEN 1 ELSE 0 END' % formatted
result = (LogEntry.select(fn.Sum(SQL(case_query)), fn.Count(SQL('*')))
.where(LogEntry.repository == repository)
.where(clause)
.where(LogEntry.datetime >= since_earlier)
.tuples()
.get())
return (result[0] or 0, result[1] or 0)
def get_repository_pushes(repository, time_delta, time_delta_earlier):
push_repo = LogEntryKind.get(name = 'push_repo')
clauses = (LogEntry.kind == push_repo)
return _get_repository_events(repository, time_delta, time_delta_earlier, clauses)
def get_repository_pulls(repository, time_delta, time_delta_earlier):
repo_pull = LogEntryKind.get(name = 'pull_repo') repo_pull = LogEntryKind.get(name = 'pull_repo')
repo_verb = LogEntryKind.get(name = 'repo_verb') repo_verb = LogEntryKind.get(name = 'repo_verb')
return (LogEntry.select() clauses = ((LogEntry.kind == repo_pull) | (LogEntry.kind == repo_verb))
.where(LogEntry.repository == repository) return _get_repository_events(repository, time_delta, time_delta_earlier, clauses)
.where((LogEntry.kind == repo_pull) | (LogEntry.kind == repo_verb))
.where(LogEntry.datetime >= since)
.count())
def get_repository_usage(): def get_repository_usage():

View file

@ -194,13 +194,17 @@ class Repository(RepositoryParamResource):
tag_dict = {tag.name: tag_view(tag) for tag in tags} tag_dict = {tag.name: tag_view(tag) for tag in tags}
can_write = ModifyRepositoryPermission(namespace, repository).can() can_write = ModifyRepositoryPermission(namespace, repository).can()
can_admin = AdministerRepositoryPermission(namespace, repository).can() can_admin = AdministerRepositoryPermission(namespace, repository).can()
active_builds = model.list_repository_builds(namespace, repository, 1,
include_inactive=False)
is_starred = (model.repository_is_starred(get_authenticated_user(), repo) is_starred = (model.repository_is_starred(get_authenticated_user(), repo)
if get_authenticated_user() else False) if get_authenticated_user() else False)
is_public = model.is_repository_public(repo) is_public = model.is_repository_public(repo)
(pull_today, pull_thirty_day) = model.get_repository_pulls(repo, timedelta(days=1),
timedelta(days=30))
(push_today, push_thirty_day) = model.get_repository_pushes(repo, timedelta(days=1),
timedelta(days=30))
return { return {
'namespace': namespace, 'namespace': namespace,
'name': repository, 'name': repository,
@ -209,18 +213,17 @@ class Repository(RepositoryParamResource):
'can_write': can_write, 'can_write': can_write,
'can_admin': can_admin, 'can_admin': can_admin,
'is_public': is_public, 'is_public': is_public,
'is_building': len(list(active_builds)) > 0,
'is_organization': repo.namespace_user.organization, 'is_organization': repo.namespace_user.organization,
'is_starred': is_starred, 'is_starred': is_starred,
'status_token': repo.badge_token if not is_public else '', 'status_token': repo.badge_token if not is_public else '',
'stats': { 'stats': {
'pulls': { 'pulls': {
'today': model.get_repository_pulls(repo, timedelta(days=1)), 'today': pull_today,
'thirty_day': model.get_repository_pulls(repo, timedelta(days=30)) 'thirty_day': pull_thirty_day
}, },
'pushes': { 'pushes': {
'today': model.get_repository_pushes(repo, timedelta(days=1)), 'today': push_today,
'thirty_day': model.get_repository_pushes(repo, timedelta(days=30)) 'thirty_day': push_thirty_day
} }
} }
} }