From c767aafcd606f11438adb673b61e2d3c012ff856 Mon Sep 17 00:00:00 2001
From: Joseph Schorr <joseph.schorr@coreos.com>
Date: Thu, 7 May 2015 22:49:11 -0400
Subject: [PATCH] Make the repository API faster by only checking the log
 entries table once for each kind of entry, rather than twice. We make use of
 a special subquery-like syntax, which allows us to count those entries that
 are both 30 days only and 1 day old in the same query. This was tested
 successfully on MySQL, Postgres and Sqlite.

---
 data/model/legacy.py        | 44 ++++++++++++++++++++++++-------------
 endpoints/api/repository.py | 17 ++++++++------
 2 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/data/model/legacy.py b/data/model/legacy.py
index dd6a10121..94386cb53 100644
--- a/data/model/legacy.py
+++ b/data/model/legacy.py
@@ -19,7 +19,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
                            db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem,
                            ImageStorageSignatureKind, validate_database_url, db_for_update,
                            AccessTokenKind, Star, get_epoch_timestamp, RepositoryActionCount)
-from peewee import JOIN_LEFT_OUTER, fn
+from peewee import JOIN_LEFT_OUTER, fn, SQL
 from util.validation import (validate_username, validate_email, validate_password,
                              INVALID_PASSWORD_MESSAGE)
 from util.names import format_robot_username, parse_robot_username
@@ -2777,24 +2777,38 @@ def cancel_repository_build(build, work_queue):
     build.delete_instance()
     return True
 
-def get_repository_pushes(repository, time_delta):
+def _get_repository_events(repository, time_delta, time_delta_earlier, clause):
   since = date.today() - time_delta
-  push_repo = LogEntryKind.get(name = 'push_repo')
-  return (LogEntry.select()
-                  .where(LogEntry.repository == repository)
-                  .where(LogEntry.kind == push_repo)
-                  .where(LogEntry.datetime >= since)
-                  .count())
+  since_earlier = date.today() - time_delta_earlier
 
-def get_repository_pulls(repository, time_delta):
-  since = date.today() - time_delta
+  if since_earlier >= since:
+    raise Exception('time_delta_earlier must be greater than time_delta')
+
+  # This uses a CASE WHEN inner clause to further filter the count.
+  formatted = since.strftime('%Y-%m-%d')
+  case_query = 'CASE WHEN datetime >= \'%s\' THEN 1 ELSE 0 END' % formatted
+
+  result = (LogEntry.select(fn.Sum(SQL(case_query)), fn.Count(SQL('*')))
+                    .where(LogEntry.repository == repository)
+                    .where(clause)
+                    .where(LogEntry.datetime >= since_earlier)
+                    .tuples()
+                    .get())
+
+  return (result[0] or 0, result[1] or 0)
+
+
+def get_repository_pushes(repository, time_delta, time_delta_earlier):
+  push_repo = LogEntryKind.get(name = 'push_repo')
+  clauses = (LogEntry.kind == push_repo)
+  return _get_repository_events(repository, time_delta, time_delta_earlier, clauses)
+
+
+def get_repository_pulls(repository, time_delta, time_delta_earlier):
   repo_pull = LogEntryKind.get(name = 'pull_repo')
   repo_verb = LogEntryKind.get(name = 'repo_verb')
-  return (LogEntry.select()
-                  .where(LogEntry.repository == repository)
-                  .where((LogEntry.kind == repo_pull) | (LogEntry.kind == repo_verb))
-                  .where(LogEntry.datetime >= since)
-                  .count())
+  clauses = ((LogEntry.kind == repo_pull) | (LogEntry.kind == repo_verb))
+  return _get_repository_events(repository, time_delta, time_delta_earlier, clauses)
 
 
 def get_repository_usage():
diff --git a/endpoints/api/repository.py b/endpoints/api/repository.py
index c66a294ad..bbc1b49de 100644
--- a/endpoints/api/repository.py
+++ b/endpoints/api/repository.py
@@ -194,13 +194,17 @@ class Repository(RepositoryParamResource):
       tag_dict = {tag.name: tag_view(tag) for tag in tags}
       can_write = ModifyRepositoryPermission(namespace, repository).can()
       can_admin = AdministerRepositoryPermission(namespace, repository).can()
-      active_builds = model.list_repository_builds(namespace, repository, 1,
-                                                   include_inactive=False)
 
       is_starred = (model.repository_is_starred(get_authenticated_user(), repo)
                     if get_authenticated_user() else False)
       is_public = model.is_repository_public(repo)
 
+      (pull_today, pull_thirty_day) = model.get_repository_pulls(repo, timedelta(days=1),
+                                                                 timedelta(days=30))
+
+      (push_today, push_thirty_day) = model.get_repository_pushes(repo, timedelta(days=1),
+                                                                  timedelta(days=30))
+
       return {
         'namespace': namespace,
         'name': repository,
@@ -209,18 +213,17 @@ class Repository(RepositoryParamResource):
         'can_write': can_write,
         'can_admin': can_admin,
         'is_public': is_public,
-        'is_building': len(list(active_builds)) > 0,
         'is_organization': repo.namespace_user.organization,
         'is_starred': is_starred,
         'status_token': repo.badge_token if not is_public else '',
         'stats': {
           'pulls': {
-            'today': model.get_repository_pulls(repo, timedelta(days=1)),
-            'thirty_day': model.get_repository_pulls(repo, timedelta(days=30))
+            'today': pull_today,
+            'thirty_day': pull_thirty_day
           },
           'pushes': {
-            'today': model.get_repository_pushes(repo, timedelta(days=1)),
-            'thirty_day': model.get_repository_pushes(repo, timedelta(days=30))
+            'today': push_today,
+            'thirty_day': push_thirty_day
           }
         }
       }