Add a RepositoryActionCount table so we can use it (instead of LogEntry) when scoring repo search results

2015-04-13 13:31:07 -04:00 · 2015-04-13 13:31:07 -04:00 · 3f1e8f3c27
commit 3f1e8f3c27
parent 703f48f194
8 changed files with 137 additions and 19 deletions
--- a/data/database.py
+++ b/data/database.py
@ -299,7 +299,7 @@ class Repository(BaseModel):
    # Therefore, we define our own deletion order here and use the dependency system to verify it.
    ordered_dependencies = [RepositoryAuthorizedEmail, RepositoryTag, Image, LogEntry,
                            RepositoryBuild, RepositoryBuildTrigger, RepositoryNotification,
-                            RepositoryPermission, AccessToken, Star]
+                            RepositoryPermission, AccessToken, Star, RepositoryActionCount]

    for query, fk in self.dependencies(search_nullable=True):
      model = fk.model_class
@ -560,6 +560,20 @@ class LogEntry(BaseModel):
  metadata_json = TextField(default='{}')


+class RepositoryActionCount(BaseModel):
+  repository = ForeignKeyField(Repository, index=True)
+  count = IntegerField()
+  date = DateField(index=True)
+
+  class Meta:
+    database = db
+    read_slaves = (read_slave,)
+    indexes = (
+      # create a unique index on repository and date
+      (('repository', 'date'), True),
+    )
+
+
 class OAuthApplication(BaseModel):
  client_id = CharField(index=True, default=random_string_generator(length=20))
  client_secret = CharField(default=random_string_generator(length=40))
@ -645,4 +659,4 @@ all_models = [User, Repository, Image, AccessToken, Role, RepositoryPermission,
              ExternalNotificationEvent, ExternalNotificationMethod, RepositoryNotification,
              RepositoryAuthorizedEmail, ImageStorageTransformation, DerivedImageStorage,
              TeamMemberInvite, ImageStorageSignature, ImageStorageSignatureKind,
-              AccessTokenKind, Star]
+              AccessTokenKind, Star, RepositoryActionCount]
--- a/data/migrations/versions/30c044b75632_add_repositoryactioncount_table.py
+++ b/data/migrations/versions/30c044b75632_add_repositoryactioncount_table.py
@ -0,0 +1,36 @@
+"""Add RepositoryActionCount table
+
+Revision ID: 30c044b75632
+Revises: 2b4dc0818a5e
+Create Date: 2015-04-13 13:21:18.159602
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = '30c044b75632'
+down_revision = '2b4dc0818a5e'
+
+from alembic import op
+import sqlalchemy as sa
+
+
+def upgrade(tables):
+    ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('repositoryactioncount',
+    sa.Column('id', sa.Integer(), nullable=False),
+    sa.Column('repository_id', sa.Integer(), nullable=False),
+    sa.Column('count', sa.Integer(), nullable=False),
+    sa.Column('date', sa.Date(), nullable=False),
+    sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_repositoryactioncount_repository_id_repository')),
+    sa.PrimaryKeyConstraint('id', name=op.f('pk_repositoryactioncount'))
+    )
+    op.create_index('repositoryactioncount_date', 'repositoryactioncount', ['date'], unique=False)
+    op.create_index('repositoryactioncount_repository_id', 'repositoryactioncount', ['repository_id'], unique=False)
+    op.create_index('repositoryactioncount_repository_id_date', 'repositoryactioncount', ['repository_id', 'date'], unique=True)
+    ### end Alembic commands ###
+
+
+def downgrade(tables):
+    ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('repositoryactioncount')
+    ### end Alembic commands ###
--- a/data/model/legacy.py
+++ b/data/model/legacy.py
@ -18,7 +18,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
                           DerivedImageStorage, ImageStorageTransformation, random_string_generator,
                           db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem,
                           ImageStorageSignatureKind, validate_database_url, db_for_update,
-                           AccessTokenKind, Star, get_epoch_timestamp)
+                           AccessTokenKind, Star, get_epoch_timestamp, RepositoryActionCount)
 from peewee import JOIN_LEFT_OUTER, fn
 from util.validation import (validate_username, validate_email, validate_password,
                             INVALID_PASSWORD_MESSAGE)
@ -995,20 +995,19 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
  """ Returns repositories matching the given prefix string and passing the given checker
      function.
  """
-
  last_week = datetime.now() - timedelta(weeks=1)
  results = []
  existing_ids = []

-  def get_search_results(search_clause, with_count):
+  def get_search_results(search_clause, with_count=False):
    if len(results) >= limit:
      return

-    selected = [Repository, Namespace]
+    select_items = [Repository, Namespace]
    if with_count:
-      selected.append(fn.Count(LogEntry.id).alias('count'))
+      select_items.append(fn.Sum(RepositoryActionCount.count).alias('count'))

-    query = (Repository.select(*selected)
+    query = (Repository.select(*select_items)
                .join(Namespace, JOIN_LEFT_OUTER, on=(Namespace.id == Repository.namespace_user))
                .switch(Repository)
                .where(search_clause)
@ -1021,9 +1020,10 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
      query = query.where(~(Repository.id << existing_ids))

    if with_count:
-      query = (query.join(LogEntry, JOIN_LEFT_OUTER)
-                    .where(LogEntry.datetime >= last_week)
-                    .order_by(fn.Count(LogEntry.id).desc()))
+      query = (query.switch(Repository)
+                    .join(RepositoryActionCount)
+                    .where(RepositoryActionCount.date >= last_week)
+                    .order_by(fn.Sum(RepositoryActionCount.count).desc()))

    for result in query:
      if len(results) >= limit:
@ -1042,13 +1042,13 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
      existing_ids.append(result.id)

  # For performance reasons, we conduct the repo name and repo namespace searches on their
-  # own, and with and without counts on their own. This also affords us the ability to give
-  # higher precedence to repository names matching over namespaces, which is semantically correct.
-  get_search_results((Repository.name ** (prefix + '%')), with_count=True)
-  get_search_results((Repository.name ** (prefix + '%')), with_count=False)
+  # own. This also affords us the ability to give higher precedence to repository names matching
+  # over namespaces, which is semantically correct.
+  get_search_results(Repository.name ** (prefix + '%'), with_count=True)
+  get_search_results(Repository.name ** (prefix + '%'), with_count=False)

-  get_search_results((Namespace.username ** (prefix + '%')), with_count=True)
-  get_search_results((Namespace.username ** (prefix + '%')), with_count=False)
+  get_search_results(Namespace.username ** (prefix + '%'), with_count=True)
+  get_search_results(Namespace.username ** (prefix + '%'), with_count=False)

  return results

--- a/data/model/sqlalchemybridge.py
+++ b/data/model/sqlalchemybridge.py
@ -1,7 +1,7 @@
 from sqlalchemy import (Table, MetaData, Column, ForeignKey, Integer, String, Boolean, Text,
-                        DateTime, BigInteger, Index)
+                        DateTime, Date, BigInteger, Index)
 from peewee import (PrimaryKeyField, CharField, BooleanField, DateTimeField, TextField,
-                    ForeignKeyField, BigIntegerField, IntegerField)
+                    ForeignKeyField, BigIntegerField, IntegerField, DateField)


 OPTIONS_TO_COPY = [
@ -42,6 +42,8 @@ def gen_sqlalchemy_metadata(peewee_model_list):
        alchemy_type = Boolean
      elif isinstance(field, DateTimeField):
        alchemy_type = DateTime
+      elif isinstance(field, DateField):
+        alchemy_type = Date
      elif isinstance(field, TextField):
        alchemy_type = Text
      elif isinstance(field, ForeignKeyField):