Add a RepositoryActionCount table so we can use it (instead of LogEntry) when scoring repo search results

This commit is contained in:
Joseph Schorr 2015-04-13 13:31:07 -04:00
parent 703f48f194
commit 3f1e8f3c27
8 changed files with 137 additions and 19 deletions

View file

@ -18,7 +18,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
DerivedImageStorage, ImageStorageTransformation, random_string_generator,
db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem,
ImageStorageSignatureKind, validate_database_url, db_for_update,
AccessTokenKind, Star, get_epoch_timestamp)
AccessTokenKind, Star, get_epoch_timestamp, RepositoryActionCount)
from peewee import JOIN_LEFT_OUTER, fn
from util.validation import (validate_username, validate_email, validate_password,
INVALID_PASSWORD_MESSAGE)
@ -995,20 +995,19 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
""" Returns repositories matching the given prefix string and passing the given checker
function.
"""
last_week = datetime.now() - timedelta(weeks=1)
results = []
existing_ids = []
def get_search_results(search_clause, with_count):
def get_search_results(search_clause, with_count=False):
if len(results) >= limit:
return
selected = [Repository, Namespace]
select_items = [Repository, Namespace]
if with_count:
selected.append(fn.Count(LogEntry.id).alias('count'))
select_items.append(fn.Sum(RepositoryActionCount.count).alias('count'))
query = (Repository.select(*selected)
query = (Repository.select(*select_items)
.join(Namespace, JOIN_LEFT_OUTER, on=(Namespace.id == Repository.namespace_user))
.switch(Repository)
.where(search_clause)
@ -1021,9 +1020,10 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
query = query.where(~(Repository.id << existing_ids))
if with_count:
query = (query.join(LogEntry, JOIN_LEFT_OUTER)
.where(LogEntry.datetime >= last_week)
.order_by(fn.Count(LogEntry.id).desc()))
query = (query.switch(Repository)
.join(RepositoryActionCount)
.where(RepositoryActionCount.date >= last_week)
.order_by(fn.Sum(RepositoryActionCount.count).desc()))
for result in query:
if len(results) >= limit:
@ -1042,13 +1042,13 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
existing_ids.append(result.id)
# For performance reasons, we conduct the repo name and repo namespace searches on their
# own, and with and without counts on their own. This also affords us the ability to give
# higher precedence to repository names matching over namespaces, which is semantically correct.
get_search_results((Repository.name ** (prefix + '%')), with_count=True)
get_search_results((Repository.name ** (prefix + '%')), with_count=False)
# own. This also affords us the ability to give higher precedence to repository names matching
# over namespaces, which is semantically correct.
get_search_results(Repository.name ** (prefix + '%'), with_count=True)
get_search_results(Repository.name ** (prefix + '%'), with_count=False)
get_search_results((Namespace.username ** (prefix + '%')), with_count=True)
get_search_results((Namespace.username ** (prefix + '%')), with_count=False)
get_search_results(Namespace.username ** (prefix + '%'), with_count=True)
get_search_results(Namespace.username ** (prefix + '%'), with_count=False)
return results

View file

@ -1,7 +1,7 @@
from sqlalchemy import (Table, MetaData, Column, ForeignKey, Integer, String, Boolean, Text,
DateTime, BigInteger, Index)
DateTime, Date, BigInteger, Index)
from peewee import (PrimaryKeyField, CharField, BooleanField, DateTimeField, TextField,
ForeignKeyField, BigIntegerField, IntegerField)
ForeignKeyField, BigIntegerField, IntegerField, DateField)
OPTIONS_TO_COPY = [
@ -42,6 +42,8 @@ def gen_sqlalchemy_metadata(peewee_model_list):
alchemy_type = Boolean
elif isinstance(field, DateTimeField):
alchemy_type = DateTime
elif isinstance(field, DateField):
alchemy_type = Date
elif isinstance(field, TextField):
alchemy_type = Text
elif isinstance(field, ForeignKeyField):