Merge pull request #615 from coreos-inc/queriesunite

Unionize the mega query - It needed more performance-based benefits
This commit is contained in:
josephschorr 2015-10-15 13:17:01 -04:00
commit 24b54f1e34
7 changed files with 145 additions and 54 deletions

View file

@ -25,7 +25,18 @@ def filter_to_repos_for_user(query, username=None, namespace=None, include_publi
if not include_public and not username:
return Repository.select().where(Repository.id == '-1')
where_clause = None
# Build a set of queries that, when unioned together, return the full set of visible repositories
# for the filters specified.
queries = []
where_clause = (True)
if namespace:
where_clause = (Namespace.username == namespace)
if include_public:
queries.append(query.clone()
.where(Repository.visibility == get_public_repo_visibility(), where_clause))
if username:
UserThroughTeam = User.alias()
Org = User.alias()
@ -33,37 +44,32 @@ def filter_to_repos_for_user(query, username=None, namespace=None, include_publi
AdminTeamMember = TeamMember.alias()
AdminUser = User.alias()
query = (query
.switch(RepositoryPermission)
.join(User, JOIN_LEFT_OUTER)
.switch(RepositoryPermission)
.join(Team, JOIN_LEFT_OUTER)
.join(TeamMember, JOIN_LEFT_OUTER)
.join(UserThroughTeam, JOIN_LEFT_OUTER, on=(UserThroughTeam.id == TeamMember.user))
.switch(Repository)
.join(Org, JOIN_LEFT_OUTER, on=(Repository.namespace_user == Org.id))
.join(AdminTeam, JOIN_LEFT_OUTER, on=(Org.id == AdminTeam.organization))
.join(TeamRole, JOIN_LEFT_OUTER, on=(AdminTeam.role == TeamRole.id))
.switch(AdminTeam)
.join(AdminTeamMember, JOIN_LEFT_OUTER, on=(AdminTeam.id == AdminTeamMember.team))
.join(AdminUser, JOIN_LEFT_OUTER, on=(AdminTeamMember.user == AdminUser.id)))
# Add repositories in which the user has permission.
queries.append(query.clone()
.switch(RepositoryPermission)
.join(User)
.where(User.username == username, where_clause))
where_clause = ((User.username == username) | (UserThroughTeam.username == username) |
((AdminUser.username == username) & (TeamRole.name == 'admin')))
# Add repositories in which the user is a member of a team that has permission.
queries.append(query.clone()
.switch(RepositoryPermission)
.join(Team)
.join(TeamMember)
.join(UserThroughTeam, on=(UserThroughTeam.id == TeamMember.user))
.where(UserThroughTeam.username == username, where_clause))
if namespace:
where_clause = where_clause & (Namespace.username == namespace)
# Add repositories under namespaces in which the user is the org admin.
queries.append(query.clone()
.switch(Repository)
.join(Org, on=(Repository.namespace_user == Org.id))
.join(AdminTeam, on=(Org.id == AdminTeam.organization))
.join(TeamRole, on=(AdminTeam.role == TeamRole.id))
.switch(AdminTeam)
.join(AdminTeamMember, on=(AdminTeam.id == AdminTeamMember.team))
.join(AdminUser, on=(AdminTeamMember.user == AdminUser.id))
.where(AdminUser.username == username, where_clause))
# TODO(jschorr, jake): Figure out why the old join on Visibility was so darn slow and
# remove this hack.
if include_public:
new_clause = (Repository.visibility == get_public_repo_visibility())
if where_clause:
where_clause = where_clause | new_clause
else:
where_clause = new_clause
return query.where(where_clause)
return reduce(lambda l, r: l | r, queries)
def get_user_organizations(username):

View file

@ -14,6 +14,10 @@ from data.database import (Repository, Namespace, RepositoryTag, Star, Image, Im
logger = logging.getLogger(__name__)
def get_public_repo_visibility():
return _basequery.get_public_repo_visibility()
def create_repository(namespace, name, creating_user, visibility='private'):
private = Visibility.get(name=visibility)
namespace_user = User.get(username=namespace)
@ -241,28 +245,9 @@ def get_visible_repositories(username, namespace=None, page=None, limit=None, in
if not include_public and not username:
return []
fields = [Repository.name, Repository.id, Repository.description, Visibility.name,
Namespace.username]
query = _visible_repository_query(username=username, page=page,
limit=limit, namespace=namespace, include_public=include_public,
select_models=fields)
if limit:
query = query.limit(limit)
if namespace:
query = query.where(Namespace.username == namespace)
return query
def _visible_repository_query(username=None, include_public=True, limit=None,
page=None, namespace=None, select_models=[]):
query = (Repository
.select(*select_models) # MySQL/RDS complains is there are selected models for counts.
.select(Repository.name, Repository.id, Repository.description, Namespace.username)
.distinct()
.join(Visibility)
.switch(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.switch(Repository)

View file

@ -133,8 +133,7 @@ def list_repository_tag_history(repo_obj, page=1, size=100, specific_tag=None):
.join(Image)
.where(RepositoryTag.repository == repo_obj)
.where(RepositoryTag.hidden == False)
.order_by(RepositoryTag.lifetime_start_ts.desc())
.order_by(RepositoryTag.name)
.order_by(RepositoryTag.lifetime_start_ts.desc(), RepositoryTag.name)
.paginate(page, size))
if specific_tag:

View file

@ -496,12 +496,11 @@ def get_matching_user_namespaces(namespace_prefix, username, limit=10):
base_query = (Namespace
.select()
.distinct()
.limit(limit)
.join(Repository, on=(Repository.namespace_user == Namespace.id))
.join(RepositoryPermission, JOIN_LEFT_OUTER)
.where(Namespace.username ** (namespace_prefix + '%')))
return _basequery.filter_to_repos_for_user(base_query, username)
return _basequery.filter_to_repos_for_user(base_query, username).limit(limit)
def get_matching_users(username_prefix, robot_namespace=None,
organization=None):

View file

@ -203,7 +203,7 @@ class RepositoryList(ApiResource):
'namespace': repo_obj.namespace_user.username,
'name': repo_obj.name,
'description': repo_obj.description,
'is_public': repo_obj.visibility.name == 'public'
'is_public': repo_obj.visibility_id == model.repository.get_public_repo_visibility(),
}
repo_id = repo_obj.id

View file

@ -217,3 +217,7 @@ class TestImageSharing(unittest.TestCase):
still_uploading.save()
self.assertDifferentStorage('an-image', still_uploading)
if __name__ == '__main__':
unittest.main()

View file

@ -0,0 +1,98 @@
import unittest
from app import app
from initdb import setup_database_for_testing, finished_database_for_testing
from data import model
NO_ACCESS_USER = 'freshuser'
READ_ACCESS_USER = 'reader'
ADMIN_ACCESS_USER = 'devtable'
PUBLIC_USER = 'public'
RANDOM_USER = 'randomuser'
OUTSIDE_ORG_USER = 'outsideorg'
ADMIN_ROBOT_USER = 'devtable+dtrobot'
ORGANIZATION = 'buynlarge'
SIMPLE_REPO = 'simple'
PUBLIC_REPO = 'publicrepo'
RANDOM_REPO = 'randomrepo'
OUTSIDE_ORG_REPO = 'coolrepo'
ORG_REPO = 'orgrepo'
ANOTHER_ORG_REPO = 'anotherorgrepo'
# Note: The shared repo has devtable as admin, public as a writer and reader as a reader.
SHARED_REPO = 'shared'
class TestVisibleRepositories(unittest.TestCase):
def setUp(self):
setup_database_for_testing(self)
self.app = app.test_client()
self.ctx = app.test_request_context()
self.ctx.__enter__()
def tearDown(self):
finished_database_for_testing(self)
self.ctx.__exit__(True, None, None)
def assertDoesNotHaveRepo(self, username, name):
repos = list(model.repository.get_visible_repositories(username))
names = [repo.name for repo in repos]
self.assertNotIn(name, names)
def assertHasRepo(self, username, name):
repos = list(model.repository.get_visible_repositories(username))
names = [repo.name for repo in repos]
self.assertIn(name, names)
def test_noaccess(self):
repos = list(model.repository.get_visible_repositories(NO_ACCESS_USER))
names = [repo.name for repo in repos]
self.assertEquals(0, len(names))
# Try retrieving public repos now.
repos = list(model.repository.get_visible_repositories(NO_ACCESS_USER, include_public=True))
names = [repo.name for repo in repos]
self.assertIn(PUBLIC_REPO, names)
def test_public(self):
self.assertHasRepo(PUBLIC_USER, PUBLIC_REPO)
self.assertHasRepo(PUBLIC_USER, SHARED_REPO)
self.assertDoesNotHaveRepo(PUBLIC_USER, SIMPLE_REPO)
self.assertDoesNotHaveRepo(PUBLIC_USER, RANDOM_REPO)
self.assertDoesNotHaveRepo(PUBLIC_USER, OUTSIDE_ORG_REPO)
def test_reader(self):
self.assertHasRepo(READ_ACCESS_USER, SHARED_REPO)
self.assertHasRepo(READ_ACCESS_USER, ORG_REPO)
self.assertDoesNotHaveRepo(READ_ACCESS_USER, SIMPLE_REPO)
self.assertDoesNotHaveRepo(READ_ACCESS_USER, RANDOM_REPO)
self.assertDoesNotHaveRepo(READ_ACCESS_USER, OUTSIDE_ORG_REPO)
self.assertDoesNotHaveRepo(READ_ACCESS_USER, PUBLIC_REPO)
def test_random(self):
self.assertHasRepo(RANDOM_USER, RANDOM_REPO)
self.assertDoesNotHaveRepo(RANDOM_USER, SIMPLE_REPO)
self.assertDoesNotHaveRepo(RANDOM_USER, SHARED_REPO)
self.assertDoesNotHaveRepo(RANDOM_USER, ORG_REPO)
self.assertDoesNotHaveRepo(RANDOM_USER, ANOTHER_ORG_REPO)
self.assertDoesNotHaveRepo(RANDOM_USER, PUBLIC_REPO)
def test_admin(self):
self.assertHasRepo(ADMIN_ACCESS_USER, SIMPLE_REPO)
self.assertHasRepo(ADMIN_ACCESS_USER, SHARED_REPO)
self.assertHasRepo(ADMIN_ACCESS_USER, ORG_REPO)
self.assertHasRepo(ADMIN_ACCESS_USER, ANOTHER_ORG_REPO)
self.assertDoesNotHaveRepo(ADMIN_ACCESS_USER, OUTSIDE_ORG_REPO)
if __name__ == '__main__':
unittest.main()