Fix bug around search pagination with non-filtered searches

Also further optimizes the queries
This commit is contained in:
Joseph Schorr 2017-11-30 15:50:21 -05:00
parent dfd736c4c5
commit eea026be52
2 changed files with 45 additions and 12 deletions

View file

@ -428,6 +428,10 @@ def get_app_search(lookup, search_fields=None, username=None, limit=50):
offset=0, limit=limit) offset=0, limit=limit)
def _iterator_wrap(query):
for row in query:
yield row
def get_filtered_matching_repositories(lookup_value, filter_username=None, repo_kind='image', def get_filtered_matching_repositories(lookup_value, filter_username=None, repo_kind='image',
offset=0, limit=25, search_fields=None): offset=0, limit=25, search_fields=None):
""" Returns an iterator of all repositories matching the given lookup value, with optional """ Returns an iterator of all repositories matching the given lookup value, with optional
@ -440,14 +444,15 @@ def get_filtered_matching_repositories(lookup_value, filter_username=None, repo_
# Build the unfiltered search query. # Build the unfiltered search query.
unfiltered_query = _get_sorted_matching_repositories(lookup_value, repo_kind=repo_kind, unfiltered_query = _get_sorted_matching_repositories(lookup_value, repo_kind=repo_kind,
search_fields=search_fields, search_fields=search_fields,
include_private=filter_username is not None) include_private=filter_username is not None,
ids_only=filter_username is not None)
# Add a filter to the iterator, if necessary. # Add a filter to the iterator, if necessary.
if filter_username is not None: if filter_username is not None:
iterator = _filter_repositories_visible_to_username(unfiltered_query, filter_username, limit, iterator = _filter_repositories_visible_to_username(unfiltered_query, filter_username, limit,
repo_kind) repo_kind)
else: else:
iterator = unfiltered_query iterator = _iterator_wrap(unfiltered_query)
if offset > 0: if offset > 0:
take(offset, iterator) take(offset, iterator)
@ -500,18 +505,18 @@ def _filter_repositories_visible_to_username(unfiltered_query, filter_username,
def _get_sorted_matching_repositories(lookup_value, repo_kind='image', include_private=False, def _get_sorted_matching_repositories(lookup_value, repo_kind='image', include_private=False,
search_fields=None): search_fields=None, ids_only=False):
""" Returns a query of repositories matching the given lookup string, with optional inclusion of """ Returns a query of repositories matching the given lookup string, with optional inclusion of
private repositories. Note that this method does *not* filter results based on visibility private repositories. Note that this method does *not* filter results based on visibility
to users. to users.
""" """
select_fields = [Repository.id] if ids_only else [Repository, Namespace]
if not lookup_value: if not lookup_value:
# This is a generic listing of repositories. Simply return the sorted repositories based # This is a generic listing of repositories. Simply return the sorted repositories based
# on RepositorySearchScore. # on RepositorySearchScore.
query = (Repository query = (Repository
.select(Repository, Namespace, RepositorySearchScore.score) .select(*select_fields)
.join(Namespace, on=(Namespace.id == Repository.namespace_user))
.switch(Repository)
.join(RepositorySearchScore) .join(RepositorySearchScore)
.order_by(RepositorySearchScore.score.desc())) .order_by(RepositorySearchScore.score.desc()))
else: else:
@ -529,12 +534,11 @@ def _get_sorted_matching_repositories(lookup_value, repo_kind='image', include_p
cases = [(Repository.name.match(lookup_value), 100 * RepositorySearchScore.score),] cases = [(Repository.name.match(lookup_value), 100 * RepositorySearchScore.score),]
computed_score = case(None, cases, RepositorySearchScore.score).alias('score') computed_score = case(None, cases, RepositorySearchScore.score).alias('score')
query = (Repository.select(Repository, Namespace, computed_score) select_fields.append(computed_score)
.join(Namespace, on=(Namespace.id == Repository.namespace_user)).where(clause) query = (Repository.select(*select_fields)
.group_by(Repository.id, Namespace.id)) .join(RepositorySearchScore)
.where(clause)
query = (query.switch(Repository).join(RepositorySearchScore) .order_by(SQL('score').desc()))
.group_by(Repository, Namespace, RepositorySearchScore).order_by(SQL('score').desc()))
if repo_kind is not None: if repo_kind is not None:
query = query.where(Repository.kind == Repository.kind.get_id(repo_kind)) query = query.where(Repository.kind == Repository.kind.get_id(repo_kind))
@ -542,6 +546,11 @@ def _get_sorted_matching_repositories(lookup_value, repo_kind='image', include_p
if not include_private: if not include_private:
query = query.where(Repository.visibility == _basequery.get_public_repo_visibility()) query = query.where(Repository.visibility == _basequery.get_public_repo_visibility())
if not ids_only:
query = (query
.switch(Repository)
.join(Namespace, on=(Namespace.id == Repository.namespace_user)))
return query return query

View file

@ -3,6 +3,7 @@ import pytest
from peewee import IntegrityError from peewee import IntegrityError
from data.model.repository import create_repository, purge_repository, is_empty from data.model.repository import create_repository, purge_repository, is_empty
from data.model.repository import get_filtered_matching_repositories
from test.fixtures import * from test.fixtures import *
def test_duplicate_repository_different_kinds(initialized_db): def test_duplicate_repository_different_kinds(initialized_db):
@ -19,3 +20,26 @@ def test_is_empty(initialized_db):
assert is_empty('devtable', 'somenewrepo') assert is_empty('devtable', 'somenewrepo')
assert not is_empty('devtable', 'simple') assert not is_empty('devtable', 'simple')
@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0,
reason='MySQL requires specialized indexing of newly created repos')
@pytest.mark.parametrize('query', [
(''),
('e'),
])
@pytest.mark.parametrize('authed_username', [
(None),
('devtable'),
])
def test_search_pagination(query, authed_username, initialized_db):
# Create some public repos.
repo1 = create_repository('devtable', 'somenewrepo', None, repo_kind='image', visibility='public')
repo2 = create_repository('devtable', 'somenewrepo2', None, repo_kind='image', visibility='public')
repo3 = create_repository('devtable', 'somenewrepo3', None, repo_kind='image', visibility='public')
repositories = get_filtered_matching_repositories(query, filter_username=authed_username)
assert len(repositories) > 3
next_repos = get_filtered_matching_repositories(query, filter_username=authed_username, offset=1)
assert repositories[0].id != next_repos[0].id
assert repositories[1].id == next_repos[0].id