Add a RepositoryActionCount table so we can use it (instead of LogEntry) when scoring repo search results
This commit is contained in:
parent
703f48f194
commit
3f1e8f3c27
8 changed files with 137 additions and 19 deletions
|
@ -18,7 +18,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
|
|||
DerivedImageStorage, ImageStorageTransformation, random_string_generator,
|
||||
db, BUILD_PHASE, QuayUserField, ImageStorageSignature, QueueItem,
|
||||
ImageStorageSignatureKind, validate_database_url, db_for_update,
|
||||
AccessTokenKind, Star, get_epoch_timestamp)
|
||||
AccessTokenKind, Star, get_epoch_timestamp, RepositoryActionCount)
|
||||
from peewee import JOIN_LEFT_OUTER, fn
|
||||
from util.validation import (validate_username, validate_email, validate_password,
|
||||
INVALID_PASSWORD_MESSAGE)
|
||||
|
@ -995,20 +995,19 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
|
|||
""" Returns repositories matching the given prefix string and passing the given checker
|
||||
function.
|
||||
"""
|
||||
|
||||
last_week = datetime.now() - timedelta(weeks=1)
|
||||
results = []
|
||||
existing_ids = []
|
||||
|
||||
def get_search_results(search_clause, with_count):
|
||||
def get_search_results(search_clause, with_count=False):
|
||||
if len(results) >= limit:
|
||||
return
|
||||
|
||||
selected = [Repository, Namespace]
|
||||
select_items = [Repository, Namespace]
|
||||
if with_count:
|
||||
selected.append(fn.Count(LogEntry.id).alias('count'))
|
||||
select_items.append(fn.Sum(RepositoryActionCount.count).alias('count'))
|
||||
|
||||
query = (Repository.select(*selected)
|
||||
query = (Repository.select(*select_items)
|
||||
.join(Namespace, JOIN_LEFT_OUTER, on=(Namespace.id == Repository.namespace_user))
|
||||
.switch(Repository)
|
||||
.where(search_clause)
|
||||
|
@ -1021,9 +1020,10 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
|
|||
query = query.where(~(Repository.id << existing_ids))
|
||||
|
||||
if with_count:
|
||||
query = (query.join(LogEntry, JOIN_LEFT_OUTER)
|
||||
.where(LogEntry.datetime >= last_week)
|
||||
.order_by(fn.Count(LogEntry.id).desc()))
|
||||
query = (query.switch(Repository)
|
||||
.join(RepositoryActionCount)
|
||||
.where(RepositoryActionCount.date >= last_week)
|
||||
.order_by(fn.Sum(RepositoryActionCount.count).desc()))
|
||||
|
||||
for result in query:
|
||||
if len(results) >= limit:
|
||||
|
@ -1042,13 +1042,13 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
|
|||
existing_ids.append(result.id)
|
||||
|
||||
# For performance reasons, we conduct the repo name and repo namespace searches on their
|
||||
# own, and with and without counts on their own. This also affords us the ability to give
|
||||
# higher precedence to repository names matching over namespaces, which is semantically correct.
|
||||
get_search_results((Repository.name ** (prefix + '%')), with_count=True)
|
||||
get_search_results((Repository.name ** (prefix + '%')), with_count=False)
|
||||
# own. This also affords us the ability to give higher precedence to repository names matching
|
||||
# over namespaces, which is semantically correct.
|
||||
get_search_results(Repository.name ** (prefix + '%'), with_count=True)
|
||||
get_search_results(Repository.name ** (prefix + '%'), with_count=False)
|
||||
|
||||
get_search_results((Namespace.username ** (prefix + '%')), with_count=True)
|
||||
get_search_results((Namespace.username ** (prefix + '%')), with_count=False)
|
||||
get_search_results(Namespace.username ** (prefix + '%'), with_count=True)
|
||||
get_search_results(Namespace.username ** (prefix + '%'), with_count=False)
|
||||
|
||||
return results
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from sqlalchemy import (Table, MetaData, Column, ForeignKey, Integer, String, Boolean, Text,
|
||||
DateTime, BigInteger, Index)
|
||||
DateTime, Date, BigInteger, Index)
|
||||
from peewee import (PrimaryKeyField, CharField, BooleanField, DateTimeField, TextField,
|
||||
ForeignKeyField, BigIntegerField, IntegerField)
|
||||
ForeignKeyField, BigIntegerField, IntegerField, DateField)
|
||||
|
||||
|
||||
OPTIONS_TO_COPY = [
|
||||
|
@ -42,6 +42,8 @@ def gen_sqlalchemy_metadata(peewee_model_list):
|
|||
alchemy_type = Boolean
|
||||
elif isinstance(field, DateTimeField):
|
||||
alchemy_type = DateTime
|
||||
elif isinstance(field, DateField):
|
||||
alchemy_type = Date
|
||||
elif isinstance(field, TextField):
|
||||
alchemy_type = Text
|
||||
elif isinstance(field, ForeignKeyField):
|
||||
|
|
Reference in a new issue