Merge pull request #2392 from coreos-inc/search-optimization

Optimize repository search by changing our lookup strategy
2017-03-10 15:44:26 -05:00 · 2017-03-10 15:44:26 -05:00 · 432b2d3fe8
commit 432b2d3fe8
parent c724af12cd d42ec4e585
9 changed files with 123 additions and 123 deletions
--- a/endpoints/api/search.py
+++ b/endpoints/api/search.py
@ -18,6 +18,12 @@ from util.names import parse_robot_username
 import anunidecode # Don't listen to pylint's lies. This import is required.
 import math

+
+ENTITY_SEARCH_SCORE = 1
+TEAM_SEARCH_SCORE = 2
+REPOSITORY_SEARCH_SCORE = 4
+
+
@resource('/v1/entities/link/<username>')
@internal_only
 class LinkExternalEntity(ApiResource):
@ -179,7 +185,7 @@ def search_entity_view(username, entity, get_short_name=None):
    'kind': kind,
    'avatar': avatar_data,
    'name': entity.username,
-    'score': 1,
+    'score': ENTITY_SEARCH_SCORE,
    'href': href
  }

@ -203,7 +209,7 @@ def conduct_team_search(username, query, encountered_teams, results):
      'name': team.name,
      'organization': search_entity_view(username, team.organization),
      'avatar': avatar.get_data_for_team(team),
-      'score': 2,
+      'score': TEAM_SEARCH_SCORE,
      'href': '/organization/' + team.organization.username + '/teams/' + team.name
    })

@ -222,38 +228,23 @@ def conduct_admined_team_search(username, query, encountered_teams, results):
      'name': team.name,
      'organization': search_entity_view(username, team.organization),
      'avatar': avatar.get_data_for_team(team),
-      'score': 2,
+      'score': TEAM_SEARCH_SCORE,
      'href': '/organization/' + team.organization.username + '/teams/' + team.name
    })


 def conduct_repo_search(username, query, results):
  """ Finds matching repositories. """
-  def can_read(repo):
-    if repo.is_public:
-      return True
-
-    return ReadRepositoryPermission(repo.namespace_user.username, repo.name).can()
-
-  only_public = username is None
-  matching_repos = model.repository.get_sorted_matching_repositories(query, only_public, can_read,
-                                                                     limit=5)
+  matching_repos = model.repository.get_filtered_matching_repositories(query, username, limit=5)

  for repo in matching_repos:
-    repo_score = math.log(repo.count or 1, 10) or 1
-
-    # If the repository is under the user's namespace, give it 20% more weight.
-    namespace = repo.namespace_user.username
-    if OrganizationMemberPermission(namespace).can() or namespace == username:
-      repo_score = repo_score * 1.2
-
    results.append({
      'kind': 'repository',
      'namespace': search_entity_view(username, repo.namespace_user),
      'name': repo.name,
      'description': repo.description,
-      'is_public': repo.is_public,
-      'score': repo_score,
+      'is_public': model.repository.is_repository_public(repo),
+      'score': REPOSITORY_SEARCH_SCORE,
      'href': '/repository/' + repo.namespace_user.username + '/' + repo.name
    })

--- a/endpoints/v1/index.py
+++ b/endpoints/v1/index.py
@ -312,33 +312,19 @@ def get_search():

 def _conduct_repo_search(username, query, limit=25, page=1):
  """ Finds matching repositories. """
-  only_public = username is None
-
-  def can_read(repo):
-    if repo.is_public:
-      return True
-
-    if only_public:
-      return False
-
-    return ReadRepositoryPermission(repo.namespace_user.username, repo.name).can()
-
-  # Note: We put a max 5 page limit here. The Docker CLI doesn't seem to use the
-  # pagination and most customers hitting the API should be using V2 catalog, so this
-  # is a safety net for our slow search below, since we have to use the slow approach
-  # of finding *all* the results, and then slicing in-memory, because this old API requires
-  # the *full* page count in the returned results.
-  _MAX_PAGE_COUNT = 5
-  page = min(page, _MAX_PAGE_COUNT)
+  # Note that we put a maximum limit of five pages here, because this API should only really ever
+  # be used by the Docker CLI, and it doesn't even paginate.
+  page = min(page, 5)
+  offset = (page - 1) * limit

  if query:
-    matching_repos = model.get_sorted_matching_repositories(query, only_public, can_read,
-                                                            limit=limit*_MAX_PAGE_COUNT)
+    matching_repos = model.get_sorted_matching_repositories(query, username, limit=limit+1,
+                                                            offset=offset)
  else:
    matching_repos = []

  results = []
-  for repo in matching_repos[(page - 1) * _MAX_PAGE_COUNT:limit]:
+  for repo in matching_repos[0:limit]:
    results.append({
      'name': repo.namespace_name + '/' + repo.name,
      'description': repo.description,
@ -350,7 +336,7 @@ def _conduct_repo_search(username, query, limit=25, page=1):
  return {
    'query': query,
    'num_results': len(results),
-    'num_pages': (len(matching_repos) / limit) + 1,
+    'num_pages': page + 1 if len(matching_repos) > limit else page,
    'page': page,
    'page_size': limit,
    'results': results,