From e8cb359d968df435f65ff6bb756b075dcfdb213d Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 7 Oct 2015 10:00:12 -0700 Subject: [PATCH] Unionize the mega query - It needed more performance-based benefits --- data/model/_basequery.py | 64 +++++++++++++++++++++----------------- data/model/repository.py | 3 -- data/model/tag.py | 3 +- data/model/user.py | 3 +- test/test_image_sharing.py | 4 +++ 5 files changed, 41 insertions(+), 36 deletions(-) diff --git a/data/model/_basequery.py b/data/model/_basequery.py index 131f860e7..054e00eb5 100644 --- a/data/model/_basequery.py +++ b/data/model/_basequery.py @@ -25,7 +25,18 @@ def filter_to_repos_for_user(query, username=None, namespace=None, include_publi if not include_public and not username: return Repository.select().where(Repository.id == '-1') - where_clause = None + # Build a set of queries that, when unioned together, return the full set of visible repositories + # for the filters specified. + queries = [] + + where_clause = (True) + if namespace: + where_clause = (Namespace.username == namespace) + + if include_public: + queries.append(query.clone() + .where(Repository.visibility == get_public_repo_visibility(), where_clause)) + if username: UserThroughTeam = User.alias() Org = User.alias() @@ -33,37 +44,32 @@ def filter_to_repos_for_user(query, username=None, namespace=None, include_publi AdminTeamMember = TeamMember.alias() AdminUser = User.alias() - query = (query - .switch(RepositoryPermission) - .join(User, JOIN_LEFT_OUTER) - .switch(RepositoryPermission) - .join(Team, JOIN_LEFT_OUTER) - .join(TeamMember, JOIN_LEFT_OUTER) - .join(UserThroughTeam, JOIN_LEFT_OUTER, on=(UserThroughTeam.id == TeamMember.user)) - .switch(Repository) - .join(Org, JOIN_LEFT_OUTER, on=(Repository.namespace_user == Org.id)) - .join(AdminTeam, JOIN_LEFT_OUTER, on=(Org.id == AdminTeam.organization)) - .join(TeamRole, JOIN_LEFT_OUTER, on=(AdminTeam.role == TeamRole.id)) - .switch(AdminTeam) - .join(AdminTeamMember, JOIN_LEFT_OUTER, on=(AdminTeam.id == AdminTeamMember.team)) - .join(AdminUser, JOIN_LEFT_OUTER, on=(AdminTeamMember.user == AdminUser.id))) + # Add repositories in which the user has permission. + queries.append(query.clone() + .switch(RepositoryPermission) + .join(User) + .where(User.username == username, where_clause)) - where_clause = ((User.username == username) | (UserThroughTeam.username == username) | - ((AdminUser.username == username) & (TeamRole.name == 'admin'))) + # Add repositories in which the user is a member of a team that has permission. + queries.append(query.clone() + .switch(RepositoryPermission) + .join(Team) + .join(TeamMember) + .join(UserThroughTeam, on=(UserThroughTeam.id == TeamMember.user)) + .where(UserThroughTeam.username == username, where_clause)) - if namespace: - where_clause = where_clause & (Namespace.username == namespace) + # Add repositories under namespaces in which the user is the org admin. + queries.append(query.clone() + .switch(Repository) + .join(Org, on=(Repository.namespace_user == Org.id)) + .join(AdminTeam, on=(Org.id == AdminTeam.organization)) + .join(TeamRole, on=(AdminTeam.role == TeamRole.id)) + .switch(AdminTeam) + .join(AdminTeamMember, on=(AdminTeam.id == AdminTeamMember.team)) + .join(AdminUser, on=(AdminTeamMember.user == AdminUser.id)) + .where(AdminUser.username == username, where_clause)) - # TODO(jschorr, jake): Figure out why the old join on Visibility was so darn slow and - # remove this hack. - if include_public: - new_clause = (Repository.visibility == get_public_repo_visibility()) - if where_clause: - where_clause = where_clause | new_clause - else: - where_clause = new_clause - - return query.where(where_clause) + return reduce(lambda l, r: l | r, queries) def get_user_organizations(username): diff --git a/data/model/repository.py b/data/model/repository.py index 4a732d346..d5668b186 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -251,9 +251,6 @@ def get_visible_repositories(username, namespace=None, page=None, limit=None, in if limit: query = query.limit(limit) - if namespace: - query = query.where(Namespace.username == namespace) - return query diff --git a/data/model/tag.py b/data/model/tag.py index 5875c95b1..cf6ddf3c6 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -133,8 +133,7 @@ def list_repository_tag_history(repo_obj, page=1, size=100, specific_tag=None): .join(Image) .where(RepositoryTag.repository == repo_obj) .where(RepositoryTag.hidden == False) - .order_by(RepositoryTag.lifetime_start_ts.desc()) - .order_by(RepositoryTag.name) + .order_by(RepositoryTag.lifetime_start_ts.desc(), RepositoryTag.name) .paginate(page, size)) if specific_tag: diff --git a/data/model/user.py b/data/model/user.py index 1a7709ec7..29a2d88c1 100644 --- a/data/model/user.py +++ b/data/model/user.py @@ -496,12 +496,11 @@ def get_matching_user_namespaces(namespace_prefix, username, limit=10): base_query = (Namespace .select() .distinct() - .limit(limit) .join(Repository, on=(Repository.namespace_user == Namespace.id)) .join(RepositoryPermission, JOIN_LEFT_OUTER) .where(Namespace.username ** (namespace_prefix + '%'))) - return _basequery.filter_to_repos_for_user(base_query, username) + return _basequery.filter_to_repos_for_user(base_query, username).limit(limit) def get_matching_users(username_prefix, robot_namespace=None, organization=None): diff --git a/test/test_image_sharing.py b/test/test_image_sharing.py index d932f6dcc..b72b88875 100644 --- a/test/test_image_sharing.py +++ b/test/test_image_sharing.py @@ -217,3 +217,7 @@ class TestImageSharing(unittest.TestCase): still_uploading.save() self.assertDifferentStorage('an-image', still_uploading) + + +if __name__ == '__main__': + unittest.main()