Merge pull request #2392 from coreos-inc/search-optimization

Optimize repository search by changing our lookup strategy
This commit is contained in:
josephschorr 2017-03-10 15:44:26 -05:00 committed by GitHub
commit 432b2d3fe8
9 changed files with 123 additions and 123 deletions

View file

@ -18,6 +18,12 @@ from util.names import parse_robot_username
import anunidecode # Don't listen to pylint's lies. This import is required.
import math
ENTITY_SEARCH_SCORE = 1
TEAM_SEARCH_SCORE = 2
REPOSITORY_SEARCH_SCORE = 4
@resource('/v1/entities/link/<username>')
@internal_only
class LinkExternalEntity(ApiResource):
@ -179,7 +185,7 @@ def search_entity_view(username, entity, get_short_name=None):
'kind': kind,
'avatar': avatar_data,
'name': entity.username,
'score': 1,
'score': ENTITY_SEARCH_SCORE,
'href': href
}
@ -203,7 +209,7 @@ def conduct_team_search(username, query, encountered_teams, results):
'name': team.name,
'organization': search_entity_view(username, team.organization),
'avatar': avatar.get_data_for_team(team),
'score': 2,
'score': TEAM_SEARCH_SCORE,
'href': '/organization/' + team.organization.username + '/teams/' + team.name
})
@ -222,38 +228,23 @@ def conduct_admined_team_search(username, query, encountered_teams, results):
'name': team.name,
'organization': search_entity_view(username, team.organization),
'avatar': avatar.get_data_for_team(team),
'score': 2,
'score': TEAM_SEARCH_SCORE,
'href': '/organization/' + team.organization.username + '/teams/' + team.name
})
def conduct_repo_search(username, query, results):
""" Finds matching repositories. """
def can_read(repo):
if repo.is_public:
return True
return ReadRepositoryPermission(repo.namespace_user.username, repo.name).can()
only_public = username is None
matching_repos = model.repository.get_sorted_matching_repositories(query, only_public, can_read,
limit=5)
matching_repos = model.repository.get_filtered_matching_repositories(query, username, limit=5)
for repo in matching_repos:
repo_score = math.log(repo.count or 1, 10) or 1
# If the repository is under the user's namespace, give it 20% more weight.
namespace = repo.namespace_user.username
if OrganizationMemberPermission(namespace).can() or namespace == username:
repo_score = repo_score * 1.2
results.append({
'kind': 'repository',
'namespace': search_entity_view(username, repo.namespace_user),
'name': repo.name,
'description': repo.description,
'is_public': repo.is_public,
'score': repo_score,
'is_public': model.repository.is_repository_public(repo),
'score': REPOSITORY_SEARCH_SCORE,
'href': '/repository/' + repo.namespace_user.username + '/' + repo.name
})

View file

@ -312,33 +312,19 @@ def get_search():
def _conduct_repo_search(username, query, limit=25, page=1):
""" Finds matching repositories. """
only_public = username is None
def can_read(repo):
if repo.is_public:
return True
if only_public:
return False
return ReadRepositoryPermission(repo.namespace_user.username, repo.name).can()
# Note: We put a max 5 page limit here. The Docker CLI doesn't seem to use the
# pagination and most customers hitting the API should be using V2 catalog, so this
# is a safety net for our slow search below, since we have to use the slow approach
# of finding *all* the results, and then slicing in-memory, because this old API requires
# the *full* page count in the returned results.
_MAX_PAGE_COUNT = 5
page = min(page, _MAX_PAGE_COUNT)
# Note that we put a maximum limit of five pages here, because this API should only really ever
# be used by the Docker CLI, and it doesn't even paginate.
page = min(page, 5)
offset = (page - 1) * limit
if query:
matching_repos = model.get_sorted_matching_repositories(query, only_public, can_read,
limit=limit*_MAX_PAGE_COUNT)
matching_repos = model.get_sorted_matching_repositories(query, username, limit=limit+1,
offset=offset)
else:
matching_repos = []
results = []
for repo in matching_repos[(page - 1) * _MAX_PAGE_COUNT:limit]:
for repo in matching_repos[0:limit]:
results.append({
'name': repo.namespace_name + '/' + repo.name,
'description': repo.description,
@ -350,7 +336,7 @@ def _conduct_repo_search(username, query, limit=25, page=1):
return {
'query': query,
'num_results': len(results),
'num_pages': (len(matching_repos) / limit) + 1,
'num_pages': page + 1 if len(matching_repos) > limit else page,
'page': page,
'page_size': limit,
'results': results,