From eea026be529c1ecad369a64b8a4a441ae07b7ae9 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 30 Nov 2017 15:50:21 -0500 Subject: [PATCH] Fix bug around search pagination with non-filtered searches Also further optimizes the queries --- data/model/repository.py | 33 +++++++++++++++++++----------- data/model/test/test_repository.py | 24 ++++++++++++++++++++++ 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/data/model/repository.py b/data/model/repository.py index c952484c0..7c60c7dc7 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -428,6 +428,10 @@ def get_app_search(lookup, search_fields=None, username=None, limit=50): offset=0, limit=limit) +def _iterator_wrap(query): + for row in query: + yield row + def get_filtered_matching_repositories(lookup_value, filter_username=None, repo_kind='image', offset=0, limit=25, search_fields=None): """ Returns an iterator of all repositories matching the given lookup value, with optional @@ -440,14 +444,15 @@ def get_filtered_matching_repositories(lookup_value, filter_username=None, repo_ # Build the unfiltered search query. unfiltered_query = _get_sorted_matching_repositories(lookup_value, repo_kind=repo_kind, search_fields=search_fields, - include_private=filter_username is not None) + include_private=filter_username is not None, + ids_only=filter_username is not None) # Add a filter to the iterator, if necessary. if filter_username is not None: iterator = _filter_repositories_visible_to_username(unfiltered_query, filter_username, limit, repo_kind) else: - iterator = unfiltered_query + iterator = _iterator_wrap(unfiltered_query) if offset > 0: take(offset, iterator) @@ -500,18 +505,18 @@ def _filter_repositories_visible_to_username(unfiltered_query, filter_username, def _get_sorted_matching_repositories(lookup_value, repo_kind='image', include_private=False, - search_fields=None): + search_fields=None, ids_only=False): """ Returns a query of repositories matching the given lookup string, with optional inclusion of private repositories. Note that this method does *not* filter results based on visibility to users. """ + select_fields = [Repository.id] if ids_only else [Repository, Namespace] + if not lookup_value: # This is a generic listing of repositories. Simply return the sorted repositories based # on RepositorySearchScore. query = (Repository - .select(Repository, Namespace, RepositorySearchScore.score) - .join(Namespace, on=(Namespace.id == Repository.namespace_user)) - .switch(Repository) + .select(*select_fields) .join(RepositorySearchScore) .order_by(RepositorySearchScore.score.desc())) else: @@ -529,12 +534,11 @@ def _get_sorted_matching_repositories(lookup_value, repo_kind='image', include_p cases = [(Repository.name.match(lookup_value), 100 * RepositorySearchScore.score),] computed_score = case(None, cases, RepositorySearchScore.score).alias('score') - query = (Repository.select(Repository, Namespace, computed_score) - .join(Namespace, on=(Namespace.id == Repository.namespace_user)).where(clause) - .group_by(Repository.id, Namespace.id)) - - query = (query.switch(Repository).join(RepositorySearchScore) - .group_by(Repository, Namespace, RepositorySearchScore).order_by(SQL('score').desc())) + select_fields.append(computed_score) + query = (Repository.select(*select_fields) + .join(RepositorySearchScore) + .where(clause) + .order_by(SQL('score').desc())) if repo_kind is not None: query = query.where(Repository.kind == Repository.kind.get_id(repo_kind)) @@ -542,6 +546,11 @@ def _get_sorted_matching_repositories(lookup_value, repo_kind='image', include_p if not include_private: query = query.where(Repository.visibility == _basequery.get_public_repo_visibility()) + if not ids_only: + query = (query + .switch(Repository) + .join(Namespace, on=(Namespace.id == Repository.namespace_user))) + return query diff --git a/data/model/test/test_repository.py b/data/model/test/test_repository.py index b5c6f741b..c55d084c1 100644 --- a/data/model/test/test_repository.py +++ b/data/model/test/test_repository.py @@ -3,6 +3,7 @@ import pytest from peewee import IntegrityError from data.model.repository import create_repository, purge_repository, is_empty +from data.model.repository import get_filtered_matching_repositories from test.fixtures import * def test_duplicate_repository_different_kinds(initialized_db): @@ -19,3 +20,26 @@ def test_is_empty(initialized_db): assert is_empty('devtable', 'somenewrepo') assert not is_empty('devtable', 'simple') + +@pytest.mark.skipif(os.environ.get('TEST_DATABASE_URI', '').find('mysql') >= 0, + reason='MySQL requires specialized indexing of newly created repos') +@pytest.mark.parametrize('query', [ + (''), + ('e'), +]) +@pytest.mark.parametrize('authed_username', [ + (None), + ('devtable'), +]) +def test_search_pagination(query, authed_username, initialized_db): + # Create some public repos. + repo1 = create_repository('devtable', 'somenewrepo', None, repo_kind='image', visibility='public') + repo2 = create_repository('devtable', 'somenewrepo2', None, repo_kind='image', visibility='public') + repo3 = create_repository('devtable', 'somenewrepo3', None, repo_kind='image', visibility='public') + + repositories = get_filtered_matching_repositories(query, filter_username=authed_username) + assert len(repositories) > 3 + + next_repos = get_filtered_matching_repositories(query, filter_username=authed_username, offset=1) + assert repositories[0].id != next_repos[0].id + assert repositories[1].id == next_repos[0].id