Move the repo sorting by pull count into the main matching query, to both make it more accurate and make the search faster
This commit is contained in:
parent
396cba64e6
commit
0be0aed17d
2 changed files with 29 additions and 19 deletions
|
@ -688,10 +688,18 @@ def get_matching_user_namespaces(namespace_prefix, username, limit=10):
|
|||
.switch(Repository)
|
||||
.join(RepositoryPermission, JOIN_LEFT_OUTER)
|
||||
.where(Namespace.username ** (namespace_prefix + '%'))
|
||||
.group_by(Repository.namespace_user, Repository)
|
||||
.limit(limit))
|
||||
.group_by(Repository.namespace_user, Repository))
|
||||
|
||||
return [r.namespace_user for r in _filter_to_repos_for_user(query, username)]
|
||||
count = 0
|
||||
namespaces = {}
|
||||
for repo in _filter_to_repos_for_user(query, username):
|
||||
if not repo.namespace_user.username in namespaces:
|
||||
namespaces[repo.namespace_user.username] = repo.namespace_user
|
||||
count = count + 1
|
||||
if count >= limit:
|
||||
break
|
||||
|
||||
return namespaces.values()
|
||||
|
||||
def get_matching_user_teams(team_prefix, user, limit=10):
|
||||
query = (Team.select()
|
||||
|
@ -983,7 +991,8 @@ def _get_public_repo_visibility():
|
|||
return _public_repo_visibility_cache
|
||||
|
||||
|
||||
def get_matching_repositories(repo_term, username=None, limit=10, include_public=True):
|
||||
def get_matching_repositories(repo_term, username=None, limit=10, include_public=True,
|
||||
pull_count_sort=False):
|
||||
namespace_term = repo_term
|
||||
name_term = repo_term
|
||||
|
||||
|
@ -1001,21 +1010,23 @@ def get_matching_repositories(repo_term, username=None, limit=10, include_public
|
|||
search_clauses = (Repository.name ** ('%' + name_term + '%') &
|
||||
Namespace.username ** ('%' + namespace_term + '%'))
|
||||
|
||||
return visible.where(search_clauses).limit(limit)
|
||||
query = visible.where(search_clauses).limit(limit)
|
||||
|
||||
|
||||
def get_repository_pull_counts(repositories):
|
||||
if pull_count_sort:
|
||||
repo_pull = LogEntryKind.get(name = 'pull_repo')
|
||||
if not repositories:
|
||||
return []
|
||||
|
||||
last_month = datetime.now() - timedelta(weeks=4)
|
||||
return (Repository.select(Repository.id, fn.Count(LogEntry.id))
|
||||
.where(Repository.id << [r.id for r in repositories])
|
||||
|
||||
query = (query.switch(Repository)
|
||||
.join(LogEntry, JOIN_LEFT_OUTER)
|
||||
.where(LogEntry.kind == repo_pull, LogEntry.datetime >= last_month)
|
||||
.group_by(Repository.id, LogEntry.id)
|
||||
.tuples())
|
||||
.where(((LogEntry.kind == repo_pull) & (LogEntry.datetime >= last_month)) |
|
||||
(LogEntry.id >> None))
|
||||
.group_by(Repository, Namespace, Visibility)
|
||||
.order_by(fn.Count(LogEntry.id).desc())
|
||||
.select(Repository, Namespace, Visibility,
|
||||
fn.Count(LogEntry.id).alias('count')))
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def change_password(user, new_password):
|
||||
if not validate_password(new_password):
|
||||
|
|
|
@ -205,11 +205,10 @@ def conduct_admined_team_search(username, query, encountered_teams, results):
|
|||
|
||||
def conduct_repo_search(username, query, results):
|
||||
""" Finds matching repositories. """
|
||||
matching_repos = list(model.get_matching_repositories(query, username, limit=5))
|
||||
matching_repo_counts = {t[0]: t[1] for t in model.get_repository_pull_counts(matching_repos)}
|
||||
matching_repos = model.get_matching_repositories(query, username, limit=5, pull_count_sort=True)
|
||||
|
||||
for repo in matching_repos:
|
||||
repo_score = math.log(matching_repo_counts.get(repo.id, 1), 10) or 1
|
||||
repo_score = math.log(repo.count or 1, 10) or 1
|
||||
|
||||
# If the repository is under the user's namespace, give it 50% more weight.
|
||||
namespace = repo.namespace_user.username
|
||||
|
|
Reference in a new issue