Merge pull request #2392 from coreos-inc/search-optimization
Optimize repository search by changing our lookup strategy
This commit is contained in:
commit
432b2d3fe8
9 changed files with 123 additions and 123 deletions
|
@ -18,6 +18,12 @@ from util.names import parse_robot_username
|
|||
import anunidecode # Don't listen to pylint's lies. This import is required.
|
||||
import math
|
||||
|
||||
|
||||
ENTITY_SEARCH_SCORE = 1
|
||||
TEAM_SEARCH_SCORE = 2
|
||||
REPOSITORY_SEARCH_SCORE = 4
|
||||
|
||||
|
||||
@resource('/v1/entities/link/<username>')
|
||||
@internal_only
|
||||
class LinkExternalEntity(ApiResource):
|
||||
|
@ -179,7 +185,7 @@ def search_entity_view(username, entity, get_short_name=None):
|
|||
'kind': kind,
|
||||
'avatar': avatar_data,
|
||||
'name': entity.username,
|
||||
'score': 1,
|
||||
'score': ENTITY_SEARCH_SCORE,
|
||||
'href': href
|
||||
}
|
||||
|
||||
|
@ -203,7 +209,7 @@ def conduct_team_search(username, query, encountered_teams, results):
|
|||
'name': team.name,
|
||||
'organization': search_entity_view(username, team.organization),
|
||||
'avatar': avatar.get_data_for_team(team),
|
||||
'score': 2,
|
||||
'score': TEAM_SEARCH_SCORE,
|
||||
'href': '/organization/' + team.organization.username + '/teams/' + team.name
|
||||
})
|
||||
|
||||
|
@ -222,38 +228,23 @@ def conduct_admined_team_search(username, query, encountered_teams, results):
|
|||
'name': team.name,
|
||||
'organization': search_entity_view(username, team.organization),
|
||||
'avatar': avatar.get_data_for_team(team),
|
||||
'score': 2,
|
||||
'score': TEAM_SEARCH_SCORE,
|
||||
'href': '/organization/' + team.organization.username + '/teams/' + team.name
|
||||
})
|
||||
|
||||
|
||||
def conduct_repo_search(username, query, results):
|
||||
""" Finds matching repositories. """
|
||||
def can_read(repo):
|
||||
if repo.is_public:
|
||||
return True
|
||||
|
||||
return ReadRepositoryPermission(repo.namespace_user.username, repo.name).can()
|
||||
|
||||
only_public = username is None
|
||||
matching_repos = model.repository.get_sorted_matching_repositories(query, only_public, can_read,
|
||||
limit=5)
|
||||
matching_repos = model.repository.get_filtered_matching_repositories(query, username, limit=5)
|
||||
|
||||
for repo in matching_repos:
|
||||
repo_score = math.log(repo.count or 1, 10) or 1
|
||||
|
||||
# If the repository is under the user's namespace, give it 20% more weight.
|
||||
namespace = repo.namespace_user.username
|
||||
if OrganizationMemberPermission(namespace).can() or namespace == username:
|
||||
repo_score = repo_score * 1.2
|
||||
|
||||
results.append({
|
||||
'kind': 'repository',
|
||||
'namespace': search_entity_view(username, repo.namespace_user),
|
||||
'name': repo.name,
|
||||
'description': repo.description,
|
||||
'is_public': repo.is_public,
|
||||
'score': repo_score,
|
||||
'is_public': model.repository.is_repository_public(repo),
|
||||
'score': REPOSITORY_SEARCH_SCORE,
|
||||
'href': '/repository/' + repo.namespace_user.username + '/' + repo.name
|
||||
})
|
||||
|
||||
|
|
|
@ -312,33 +312,19 @@ def get_search():
|
|||
|
||||
def _conduct_repo_search(username, query, limit=25, page=1):
|
||||
""" Finds matching repositories. """
|
||||
only_public = username is None
|
||||
|
||||
def can_read(repo):
|
||||
if repo.is_public:
|
||||
return True
|
||||
|
||||
if only_public:
|
||||
return False
|
||||
|
||||
return ReadRepositoryPermission(repo.namespace_user.username, repo.name).can()
|
||||
|
||||
# Note: We put a max 5 page limit here. The Docker CLI doesn't seem to use the
|
||||
# pagination and most customers hitting the API should be using V2 catalog, so this
|
||||
# is a safety net for our slow search below, since we have to use the slow approach
|
||||
# of finding *all* the results, and then slicing in-memory, because this old API requires
|
||||
# the *full* page count in the returned results.
|
||||
_MAX_PAGE_COUNT = 5
|
||||
page = min(page, _MAX_PAGE_COUNT)
|
||||
# Note that we put a maximum limit of five pages here, because this API should only really ever
|
||||
# be used by the Docker CLI, and it doesn't even paginate.
|
||||
page = min(page, 5)
|
||||
offset = (page - 1) * limit
|
||||
|
||||
if query:
|
||||
matching_repos = model.get_sorted_matching_repositories(query, only_public, can_read,
|
||||
limit=limit*_MAX_PAGE_COUNT)
|
||||
matching_repos = model.get_sorted_matching_repositories(query, username, limit=limit+1,
|
||||
offset=offset)
|
||||
else:
|
||||
matching_repos = []
|
||||
|
||||
results = []
|
||||
for repo in matching_repos[(page - 1) * _MAX_PAGE_COUNT:limit]:
|
||||
for repo in matching_repos[0:limit]:
|
||||
results.append({
|
||||
'name': repo.namespace_name + '/' + repo.name,
|
||||
'description': repo.description,
|
||||
|
@ -350,7 +336,7 @@ def _conduct_repo_search(username, query, limit=25, page=1):
|
|||
return {
|
||||
'query': query,
|
||||
'num_results': len(results),
|
||||
'num_pages': (len(matching_repos) / limit) + 1,
|
||||
'num_pages': page + 1 if len(matching_repos) > limit else page,
|
||||
'page': page,
|
||||
'page_size': limit,
|
||||
'results': results,
|
||||
|
|
Reference in a new issue