From 7604e9842b0be442856214ec2e3d7a1f3da62261 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 19 Jun 2018 10:51:30 -0400 Subject: [PATCH] Change repo filtering for users to use a user ID reference, rather than the username While this means we need an additional query for initial lookup, it makes the *filtering* query (which is the heavy part) require far fewer joins, thus making it more efficient. Also adds a new unit test to verify that our filter filters to the correct set of repositories. --- data/model/_basequery.py | 34 ++++------ data/model/image.py | 11 +++- data/model/repository.py | 31 +++++++-- data/model/test/test_basequery.py | 104 ++++++++++++++++++++++++++++++ data/model/user.py | 5 +- endpoints/api/test/test_search.py | 5 +- test/test_api_usage.py | 2 +- 7 files changed, 158 insertions(+), 34 deletions(-) create mode 100644 data/model/test/test_basequery.py diff --git a/data/model/_basequery.py b/data/model/_basequery.py index 571b8ac9b..4ccd2fca7 100644 --- a/data/model/_basequery.py +++ b/data/model/_basequery.py @@ -69,9 +69,9 @@ def _lookup_team_roles(): return {role.name:role for role in TeamRole.select()} -def filter_to_repos_for_user(query, username=None, namespace=None, repo_kind='image', +def filter_to_repos_for_user(query, user_id=None, namespace=None, repo_kind='image', include_public=True, start_id=None): - if not include_public and not username: + if not include_public and not user_id: return Repository.select().where(Repository.id == '-1') # Filter on the type of repository. @@ -85,32 +85,28 @@ def filter_to_repos_for_user(query, username=None, namespace=None, repo_kind='im if start_id is not None: query = query.where(Repository.id >= start_id) + # Add a namespace filter if necessary. + if namespace: + query = query.where(Namespace.username == namespace) + # Build a set of queries that, when unioned together, return the full set of visible repositories # for the filters specified. queries = [] - where_clause = (True) - if namespace: - where_clause = (Namespace.username == namespace) - if include_public: queries.append(query .clone() - .where(Repository.visibility == get_public_repo_visibility(), where_clause)) + .where(Repository.visibility == get_public_repo_visibility())) - if username: - UserThroughTeam = User.alias() - Org = User.alias() + if user_id is not None: AdminTeam = Team.alias() AdminTeamMember = TeamMember.alias() - AdminUser = User.alias() # Add repositories in which the user has permission. queries.append(query .clone() .switch(RepositoryPermission) - .join(User) - .where(User.username == username, where_clause)) + .where(RepositoryPermission.user == user_id)) # Add repositories in which the user is a member of a team that has permission. queries.append(query @@ -118,20 +114,16 @@ def filter_to_repos_for_user(query, username=None, namespace=None, repo_kind='im .switch(RepositoryPermission) .join(Team) .join(TeamMember) - .join(UserThroughTeam, on=(UserThroughTeam.id == TeamMember.user)) - .where(UserThroughTeam.username == username, where_clause)) + .where(TeamMember.user == user_id)) # Add repositories under namespaces in which the user is the org admin. queries.append(query .clone() .switch(Repository) - .join(Org, on=(Repository.namespace_user == Org.id)) - .join(AdminTeam, on=(Org.id == AdminTeam.organization)) - .where(AdminTeam.role == _lookup_team_role('admin')) - .switch(AdminTeam) + .join(AdminTeam, on=(Repository.namespace_user == AdminTeam.organization)) .join(AdminTeamMember, on=(AdminTeam.id == AdminTeamMember.team)) - .join(AdminUser, on=(AdminTeamMember.user == AdminUser.id)) - .where(AdminUser.username == username, where_clause)) + .where(AdminTeam.role == _lookup_team_role('admin')) + .where(AdminTeamMember.user == user_id)) return reduce(lambda l, r: l | r, queries) diff --git a/data/model/image.py b/data/model/image.py index 9832e8a0d..d9b8c9c0e 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -12,12 +12,18 @@ from data.model import (DataModelException, db_transaction, _basequery, storage, InvalidImageException) from data.database import (Image, Repository, ImageStoragePlacement, Namespace, ImageStorage, ImageStorageLocation, RepositoryPermission, DerivedStorageForImage, - ImageStorageTransformation) + ImageStorageTransformation, User) from util.canonicaljson import canonicalize logger = logging.getLogger(__name__) +def _namespace_id_for_username(username): + try: + return User.get(username=username).id + except User.DoesNotExist: + return None + def get_image_with_storage(docker_image_id, storage_uuid): """ Returns the image with the given docker image ID and storage uuid or None if none. @@ -273,7 +279,8 @@ def find_create_or_link_image(docker_image_id, repo_obj, username, translations, .where(ImageStorage.uploading == False, Image.docker_image_id == docker_image_id)) - existing_image_query = _basequery.filter_to_repos_for_user(existing_image_query, username) + existing_image_query = _basequery.filter_to_repos_for_user(existing_image_query, + _namespace_id_for_username(username)) # If there is an existing image, we try to translate its ancestry and copy its storage. new_image = None diff --git a/data/model/repository.py b/data/model/repository.py index c9b640703..a94a52562 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -403,11 +403,17 @@ def get_visible_repositories(username, namespace=None, kind_filter='image', incl Namespace.username, Repository.visibility, Repository.kind) .switch(Repository).join(Namespace, on=(Repository.namespace_user == Namespace.id))) + user_id = None if username: # Note: We only need the permissions table if we will filter based on a user's permissions. query = query.switch(Repository).distinct().join(RepositoryPermission, JOIN_LEFT_OUTER) + found_namespace = _get_namespace_user(username) + if not found_namespace: + return Repository.select(Repository.id.alias('rid')).where(Repository.id == -1) - query = _basequery.filter_to_repos_for_user(query, username, namespace, kind_filter, + user_id = found_namespace.id + + query = _basequery.filter_to_repos_for_user(query, user_id, namespace, kind_filter, include_public, start_id=start_id) if limit is not None: @@ -434,6 +440,13 @@ def get_app_search(lookup, search_fields=None, username=None, limit=50): offset=0, limit=limit) +def _get_namespace_user(username): + try: + return User.get(username=username) + except User.DoesNotExist: + return None + + def get_filtered_matching_repositories(lookup_value, filter_username=None, repo_kind='image', offset=0, limit=25, search_fields=None): """ Returns an iterator of all repositories matching the given lookup value, with optional @@ -451,8 +464,12 @@ def get_filtered_matching_repositories(lookup_value, filter_username=None, repo_ # Add a filter to the iterator, if necessary. if filter_username is not None: - iterator = _filter_repositories_visible_to_username(unfiltered_query, filter_username, limit, - repo_kind) + filter_user = _get_namespace_user(filter_username) + if filter_user is None: + return [] + + iterator = _filter_repositories_visible_to_user(unfiltered_query, filter_user.id, limit, + repo_kind) if offset > 0: take(offset, iterator) @@ -462,7 +479,7 @@ def get_filtered_matching_repositories(lookup_value, filter_username=None, repo_ return list(unfiltered_query.offset(offset).limit(limit)) -def _filter_repositories_visible_to_username(unfiltered_query, filter_username, limit, repo_kind): +def _filter_repositories_visible_to_user(unfiltered_query, filter_user_id, limit, repo_kind): encountered = set() chunk_count = limit * 2 unfiltered_page = 0 @@ -484,11 +501,13 @@ def _filter_repositories_visible_to_username(unfiltered_query, filter_username, encountered.update(new_unfiltered_ids) # Filter the repositories found to only those visible to the current user. - query = (Repository.select(Repository, Namespace).distinct() + query = (Repository + .select(Repository, Namespace) + .distinct() .join(Namespace, on=(Namespace.id == Repository.namespace_user)).switch(Repository) .join(RepositoryPermission).where(Repository.id << list(new_unfiltered_ids))) - filtered = _basequery.filter_to_repos_for_user(query, filter_username, repo_kind=repo_kind) + filtered = _basequery.filter_to_repos_for_user(query, filter_user_id, repo_kind=repo_kind) # Sort the filtered repositories by their initial order. all_filtered_repos = list(filtered) diff --git a/data/model/test/test_basequery.py b/data/model/test/test_basequery.py new file mode 100644 index 000000000..a00d7c7f5 --- /dev/null +++ b/data/model/test/test_basequery.py @@ -0,0 +1,104 @@ +import pytest + +from peewee import JOIN_LEFT_OUTER +from playhouse.test_utils import assert_query_count + +from data.database import Repository, RepositoryPermission, TeamMember, Namespace +from data.model._basequery import filter_to_repos_for_user +from data.model.organization import get_admin_users +from data.model.user import get_namespace_user +from util.names import parse_robot_username + +from test.fixtures import * + +def _is_team_member(team, user): + return user.id in [member.user_id for member in + TeamMember.select().where(TeamMember.team == team)] + +def _get_visible_repositories_for_user(user, repo_kind='image', include_public=False, + namespace=None): + """ Returns all repositories directly visible to the given user, by either repo permission, + or the user being the admin of a namespace. + """ + for repo in Repository.select(): + if repo_kind is not None and repo.kind.name != repo_kind: + continue + + if namespace is not None and repo.namespace_user.username != namespace: + continue + + if include_public and repo.visibility.name == 'public': + yield repo + continue + + # Direct repo permission. + try: + RepositoryPermission.get(repository=repo, user=user).get() + yield repo + continue + except RepositoryPermission.DoesNotExist: + pass + + # Team permission. + found_in_team = False + for perm in RepositoryPermission.select().where(RepositoryPermission.repository == repo): + if perm.team and _is_team_member(perm.team, user): + found_in_team = True + break + + if found_in_team: + yield repo + continue + + # Org namespace admin permission. + if user in get_admin_users(repo.namespace_user): + yield repo + continue + + +@pytest.mark.parametrize('username', [ + 'devtable', + 'devtable+dtrobot', + 'public', + 'reader', +]) +@pytest.mark.parametrize('include_public', [ + True, + False +]) +@pytest.mark.parametrize('filter_to_namespace', [ + True, + False +]) +@pytest.mark.parametrize('repo_kind', [ + None, + 'image', + 'application', +]) +def test_filter_repositories(username, include_public, filter_to_namespace, repo_kind, + initialized_db): + namespace = username if filter_to_namespace else None + if '+' in username and filter_to_namespace: + namespace, _ = parse_robot_username(username) + + user = get_namespace_user(username) + query = (Repository + .select() + .distinct() + .join(Namespace, on=(Repository.namespace_user == Namespace.id)) + .switch(Repository) + .join(RepositoryPermission, JOIN_LEFT_OUTER)) + + with assert_query_count(1): + found = list(filter_to_repos_for_user(query, user.id, + namespace=namespace, + include_public=include_public, + repo_kind=repo_kind)) + + expected = list(_get_visible_repositories_for_user(user, + repo_kind=repo_kind, + namespace=namespace, + include_public=include_public)) + + assert len(found) == len(expected) + assert {r.id for r in found} == {r.id for r in expected} diff --git a/data/model/user.py b/data/model/user.py index 836ec9c20..b19623cb8 100644 --- a/data/model/user.py +++ b/data/model/user.py @@ -668,6 +668,9 @@ def invalidate_all_sessions(user): user.save() def get_matching_user_namespaces(namespace_prefix, username, limit=10): + namespace_user = get_namespace_user(username) + namespace_user_id = namespace_user.id if namespace_user is not None else None + namespace_search = prefix_search(Namespace.username, namespace_prefix) base_query = (Namespace .select() @@ -676,7 +679,7 @@ def get_matching_user_namespaces(namespace_prefix, username, limit=10): .join(RepositoryPermission, JOIN_LEFT_OUTER) .where(namespace_search)) - return _basequery.filter_to_repos_for_user(base_query, username).limit(limit) + return _basequery.filter_to_repos_for_user(base_query, namespace_user_id).limit(limit) def get_matching_users(username_prefix, robot_namespace=None, organization=None, limit=20, exact_matches_only=False): diff --git a/endpoints/api/test/test_search.py b/endpoints/api/test/test_search.py index 15fbc1864..b19efa8f3 100644 --- a/endpoints/api/test/test_search.py +++ b/endpoints/api/test/test_search.py @@ -2,7 +2,6 @@ import pytest from playhouse.test_utils import assert_query_count -from data.model import _basequery from endpoints.api.search import ConductRepositorySearch, ConductSearch from endpoints.api.test.shared import conduct_api_call from endpoints.test.shared import client_with_identity @@ -17,7 +16,7 @@ from test.fixtures import * def test_repository_search(query, client): with client_with_identity('devtable', client) as cl: params = {'query': query} - with assert_query_count(6): + with assert_query_count(7): result = conduct_api_call(cl, ConductRepositorySearch, 'GET', params, None, 200).json assert result['start_index'] == 0 assert result['page'] == 1 @@ -32,6 +31,6 @@ def test_repository_search(query, client): def test_search_query_count(query, client): with client_with_identity('devtable', client) as cl: params = {'query': query} - with assert_query_count(8): + with assert_query_count(10): result = conduct_api_call(cl, ConductSearch, 'GET', params, None, 200).json assert len(result['results']) diff --git a/test/test_api_usage.py b/test/test_api_usage.py index 74afe4c93..0caa03cb8 100644 --- a/test/test_api_usage.py +++ b/test/test_api_usage.py @@ -1858,7 +1858,7 @@ class TestListRepos(ApiTestCase): self.login(ADMIN_ACCESS_USER) # Queries: Base + the list query + the popularity and last modified queries + full perms load - with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 4): + with assert_query_count(BASE_LOGGEDIN_QUERY_COUNT + 5): json = self.getJsonResponse(RepositoryList, params=dict(namespace=ORGANIZATION, public=False, last_modified=True, popularity=True))