Move the repo sorting by pull count into the main matching query, to both make it more accurate and make the search faster

This commit is contained in:
Joseph Schorr 2015-04-09 14:41:59 -04:00
parent 396cba64e6
commit 0be0aed17d
2 changed files with 29 additions and 19 deletions

View file

@ -688,10 +688,18 @@ def get_matching_user_namespaces(namespace_prefix, username, limit=10):
.switch(Repository)
.join(RepositoryPermission, JOIN_LEFT_OUTER)
.where(Namespace.username ** (namespace_prefix + '%'))
.group_by(Repository.namespace_user, Repository)
.limit(limit))
.group_by(Repository.namespace_user, Repository))
return [r.namespace_user for r in _filter_to_repos_for_user(query, username)]
count = 0
namespaces = {}
for repo in _filter_to_repos_for_user(query, username):
if not repo.namespace_user.username in namespaces:
namespaces[repo.namespace_user.username] = repo.namespace_user
count = count + 1
if count >= limit:
break
return namespaces.values()
def get_matching_user_teams(team_prefix, user, limit=10):
query = (Team.select()
@ -983,7 +991,8 @@ def _get_public_repo_visibility():
return _public_repo_visibility_cache
def get_matching_repositories(repo_term, username=None, limit=10, include_public=True):
def get_matching_repositories(repo_term, username=None, limit=10, include_public=True,
pull_count_sort=False):
namespace_term = repo_term
name_term = repo_term
@ -1001,21 +1010,23 @@ def get_matching_repositories(repo_term, username=None, limit=10, include_public
search_clauses = (Repository.name ** ('%' + name_term + '%') &
Namespace.username ** ('%' + namespace_term + '%'))
return visible.where(search_clauses).limit(limit)
query = visible.where(search_clauses).limit(limit)
if pull_count_sort:
repo_pull = LogEntryKind.get(name = 'pull_repo')
last_month = datetime.now() - timedelta(weeks=4)
def get_repository_pull_counts(repositories):
repo_pull = LogEntryKind.get(name = 'pull_repo')
if not repositories:
return []
query = (query.switch(Repository)
.join(LogEntry, JOIN_LEFT_OUTER)
.where(((LogEntry.kind == repo_pull) & (LogEntry.datetime >= last_month)) |
(LogEntry.id >> None))
.group_by(Repository, Namespace, Visibility)
.order_by(fn.Count(LogEntry.id).desc())
.select(Repository, Namespace, Visibility,
fn.Count(LogEntry.id).alias('count')))
return query
last_month = datetime.now() - timedelta(weeks=4)
return (Repository.select(Repository.id, fn.Count(LogEntry.id))
.where(Repository.id << [r.id for r in repositories])
.join(LogEntry, JOIN_LEFT_OUTER)
.where(LogEntry.kind == repo_pull, LogEntry.datetime >= last_month)
.group_by(Repository.id, LogEntry.id)
.tuples())
def change_password(user, new_password):
if not validate_password(new_password):

View file

@ -205,11 +205,10 @@ def conduct_admined_team_search(username, query, encountered_teams, results):
def conduct_repo_search(username, query, results):
""" Finds matching repositories. """
matching_repos = list(model.get_matching_repositories(query, username, limit=5))
matching_repo_counts = {t[0]: t[1] for t in model.get_repository_pull_counts(matching_repos)}
matching_repos = model.get_matching_repositories(query, username, limit=5, pull_count_sort=True)
for repo in matching_repos:
repo_score = math.log(matching_repo_counts.get(repo.id, 1), 10) or 1
repo_score = math.log(repo.count or 1, 10) or 1
# If the repository is under the user's namespace, give it 50% more weight.
namespace = repo.namespace_user.username