Merge remote-tracking branch 'upstream/v2-phase4' into python-registry-v2

This commit is contained in:
Jake Moshenko 2015-10-22 16:59:28 -04:00
commit e7a6176594
105 changed files with 4439 additions and 2074 deletions

View file

@ -1,5 +1,6 @@
import redis
import json
import time
from util.dynamic import import_class
from datetime import timedelta
@ -65,7 +66,6 @@ class RedisBuildLogs(object):
"""
self._redis.expire(self._logs_key(build_id), ONE_DAY)
@staticmethod
def _status_key(build_id):
return 'builds/%s/status' % build_id
@ -88,9 +88,20 @@ class RedisBuildLogs(object):
return json.loads(fetched) if fetched else None
@staticmethod
def _health_key():
return '_health'
def check_health(self):
try:
return self._redis.ping() == True
if not self._redis.ping() == True:
return False
# Ensure we can write and read a key.
self._redis.set(self._health_key(), time.time())
self._redis.get(self._health_key())
return True
except redis.ConnectionError:
return False

View file

@ -491,12 +491,8 @@ class EmailConfirmation(BaseModel):
class ImageStorage(BaseModel):
uuid = CharField(default=uuid_generator, index=True, unique=True)
checksum = CharField(null=True)
created = DateTimeField(null=True)
comment = TextField(null=True)
command = TextField(null=True)
image_size = BigIntegerField(null=True)
uncompressed_size = BigIntegerField(null=True)
aggregate_size = BigIntegerField(null=True)
uploading = BooleanField(default=True, null=True)
cas_path = BooleanField(default=True)

View file

@ -0,0 +1,32 @@
"""Remove the deprecated imagestorage columns.
Revision ID: 127905a52fdd
Revises: 2e0380215d01
Create Date: 2015-09-17 15:48:56.667823
"""
# revision identifiers, used by Alembic.
revision = '127905a52fdd'
down_revision = '2e0380215d01'
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import mysql
def upgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.drop_column('imagestorage', 'comment')
op.drop_column('imagestorage', 'aggregate_size')
op.drop_column('imagestorage', 'command')
op.drop_column('imagestorage', 'created')
### end Alembic commands ###
def downgrade(tables):
### commands auto generated by Alembic - please adjust! ###
op.add_column('imagestorage', sa.Column('created', mysql.DATETIME(), nullable=True))
op.add_column('imagestorage', sa.Column('command', mysql.TEXT(), nullable=True))
op.add_column('imagestorage', sa.Column('aggregate_size', mysql.BIGINT(display_width=20), autoincrement=False, nullable=True))
op.add_column('imagestorage', sa.Column('comment', mysql.TEXT(), nullable=True))
### end Alembic commands ###

View file

@ -0,0 +1,24 @@
"""Backfill image fields from image storages
Revision ID: 2e0380215d01
Revises: 3ff4fbc94644
Create Date: 2015-09-15 16:57:42.850246
"""
# revision identifiers, used by Alembic.
revision = '2e0380215d01'
down_revision = '3ff4fbc94644'
from alembic import op
import sqlalchemy as sa
from util.migrate.backfill_image_fields import backfill_image_fields
from util.migrate.backfill_v1_metadata import backfill_v1_metadata
def upgrade(tables):
backfill_image_fields()
backfill_v1_metadata()
def downgrade(tables):
pass

View file

@ -1,10 +1,22 @@
from peewee import JOIN_LEFT_OUTER
from peewee import JOIN_LEFT_OUTER, Clause, SQL
from cachetools import lru_cache
from data.database import (Repository, User, Team, TeamMember, RepositoryPermission, TeamRole,
Namespace, Visibility, db_for_update)
def prefix_search(field, prefix_query):
""" Returns the wildcard match for searching for the given prefix query. """
# Escape the known wildcard characters.
prefix_query = (prefix_query
.replace('!', '!!')
.replace('%', '!%')
.replace('_', '!_')
.replace('[', '!['))
return field ** Clause(prefix_query + '%', SQL("ESCAPE '!'"))
def get_existing_repository(namespace_name, repository_name, for_update=False):
query = (Repository
.select(Repository, Namespace)
@ -25,7 +37,18 @@ def filter_to_repos_for_user(query, username=None, namespace=None, include_publi
if not include_public and not username:
return Repository.select().where(Repository.id == '-1')
where_clause = None
# Build a set of queries that, when unioned together, return the full set of visible repositories
# for the filters specified.
queries = []
where_clause = (True)
if namespace:
where_clause = (Namespace.username == namespace)
if include_public:
queries.append(query.clone()
.where(Repository.visibility == get_public_repo_visibility(), where_clause))
if username:
UserThroughTeam = User.alias()
Org = User.alias()
@ -33,37 +56,32 @@ def filter_to_repos_for_user(query, username=None, namespace=None, include_publi
AdminTeamMember = TeamMember.alias()
AdminUser = User.alias()
query = (query
.switch(RepositoryPermission)
.join(User, JOIN_LEFT_OUTER)
.switch(RepositoryPermission)
.join(Team, JOIN_LEFT_OUTER)
.join(TeamMember, JOIN_LEFT_OUTER)
.join(UserThroughTeam, JOIN_LEFT_OUTER, on=(UserThroughTeam.id == TeamMember.user))
.switch(Repository)
.join(Org, JOIN_LEFT_OUTER, on=(Repository.namespace_user == Org.id))
.join(AdminTeam, JOIN_LEFT_OUTER, on=(Org.id == AdminTeam.organization))
.join(TeamRole, JOIN_LEFT_OUTER, on=(AdminTeam.role == TeamRole.id))
.switch(AdminTeam)
.join(AdminTeamMember, JOIN_LEFT_OUTER, on=(AdminTeam.id == AdminTeamMember.team))
.join(AdminUser, JOIN_LEFT_OUTER, on=(AdminTeamMember.user == AdminUser.id)))
# Add repositories in which the user has permission.
queries.append(query.clone()
.switch(RepositoryPermission)
.join(User)
.where(User.username == username, where_clause))
where_clause = ((User.username == username) | (UserThroughTeam.username == username) |
((AdminUser.username == username) & (TeamRole.name == 'admin')))
# Add repositories in which the user is a member of a team that has permission.
queries.append(query.clone()
.switch(RepositoryPermission)
.join(Team)
.join(TeamMember)
.join(UserThroughTeam, on=(UserThroughTeam.id == TeamMember.user))
.where(UserThroughTeam.username == username, where_clause))
if namespace:
where_clause = where_clause & (Namespace.username == namespace)
# Add repositories under namespaces in which the user is the org admin.
queries.append(query.clone()
.switch(Repository)
.join(Org, on=(Repository.namespace_user == Org.id))
.join(AdminTeam, on=(Org.id == AdminTeam.organization))
.join(TeamRole, on=(AdminTeam.role == TeamRole.id))
.switch(AdminTeam)
.join(AdminTeamMember, on=(AdminTeam.id == AdminTeamMember.team))
.join(AdminUser, on=(AdminTeamMember.user == AdminUser.id))
.where(AdminUser.username == username, where_clause))
# TODO(jschorr, jake): Figure out why the old join on Visibility was so darn slow and
# remove this hack.
if include_public:
new_clause = (Repository.visibility == get_public_repo_visibility())
if where_clause:
where_clause = where_clause | new_clause
else:
where_clause = new_clause
return query.where(where_clause)
return reduce(lambda l, r: l | r, queries)
def get_user_organizations(username):

View file

@ -79,11 +79,14 @@ def get_repository_images_base(namespace_name, repository_name, query_modifier):
.where(Repository.name == repository_name, Namespace.username == namespace_name))
query = query_modifier(query)
return _translate_placements_to_images_with_locations(query)
return invert_placement_query_results(query)
def _translate_placements_to_images_with_locations(query):
location_list = list(query)
def invert_placement_query_results(placement_query):
""" This method will take a query which returns placements, storages, and images, and have it
return images and their storages, along with the placement set on each storage.
"""
location_list = list(placement_query)
images = {}
for location in location_list:
@ -192,7 +195,12 @@ def _find_or_link_image(existing_image, repo_obj, username, translations, prefer
new_image = Image.create(docker_image_id=existing_image.docker_image_id,
repository=repo_obj, storage=copied_storage,
ancestors=new_image_ancestry)
ancestors=new_image_ancestry,
command=existing_image.command,
created=existing_image.created,
comment=existing_image.comment,
aggregate_size=existing_image.aggregate_size)
logger.debug('Storing translation %s -> %s', existing_image.id, new_image.id)
translations[existing_image.id] = new_image.id
@ -274,24 +282,15 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created
# We cleanup any old checksum in case it's a retry after a fail
fetched.storage.checksum = None
now = datetime.now()
# TODO stop writing to storage when all readers are removed
fetched.storage.created = now
fetched.created = now
fetched.created = datetime.now()
if created_date_str is not None:
try:
# TODO stop writing to storage fields when all readers are removed
parsed_created_time = dateutil.parser.parse(created_date_str).replace(tzinfo=None)
fetched.created = parsed_created_time
fetched.storage.created = parsed_created_time
fetched.created = dateutil.parser.parse(created_date_str).replace(tzinfo=None)
except:
# parse raises different exceptions, so we cannot use a specific kind of handler here.
pass
# TODO stop writing to storage fields when all readers are removed
fetched.storage.comment = comment
fetched.storage.command = command
fetched.comment = comment
fetched.command = command
fetched.v1_json_metadata = v1_json_metadata
@ -304,6 +303,9 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created
def set_image_size(docker_image_id, namespace_name, repository_name, image_size, uncompressed_size):
if image_size is None:
raise DataModelException('Empty image size field')
try:
image = (Image
.select(Image, ImageStorage)
@ -314,7 +316,6 @@ def set_image_size(docker_image_id, namespace_name, repository_name, image_size,
.where(Repository.name == repository_name, Namespace.username == namespace_name,
Image.docker_image_id == docker_image_id)
.get())
except Image.DoesNotExist:
raise DataModelException('No image with specified id and repository')
@ -326,21 +327,17 @@ def set_image_size(docker_image_id, namespace_name, repository_name, image_size,
try:
# TODO(jschorr): Switch to this faster route once we have full ancestor aggregate_size
# parent_image = Image.get(Image.id == ancestors[-1])
# total_size = image_size + parent_image.storage.aggregate_size
total_size = (ImageStorage
.select(fn.Sum(ImageStorage.image_size))
.join(Image)
.where(Image.id << ancestors)
.scalar()) + image_size
ancestor_size = (ImageStorage
.select(fn.Sum(ImageStorage.image_size))
.join(Image)
.where(Image.id << ancestors)
.scalar())
# TODO stop writing to storage when all readers are removed
image.storage.aggregate_size = total_size
image.aggregate_size = total_size
if ancestor_size is not None:
image.aggregate_size = ancestor_size + image_size
except Image.DoesNotExist:
pass
else:
# TODO stop writing to storage when all readers are removed
image.storage.aggregate_size = image_size
image.aggregate_size = image_size
image.storage.save()
@ -374,24 +371,6 @@ def get_repo_image_by_storage_checksum(namespace, repository_name, storage_check
raise InvalidImageException(msg)
def has_image_json(image):
""" Returns the whether there exists a JSON definition data for the image. """
if image.v1_json_metadata:
return bool(image.v1_json_metadata)
store = config.store
return store.exists(image.storage.locations, store.image_json_path(image.storage.uuid))
def get_image_json(image):
""" Returns the JSON definition data for the image. """
if image.v1_json_metadata:
return image.v1_json_metadata
store = config.store
return store.get_content(image.storage.locations, store.image_json_path(image.storage.uuid))
def get_image_layers(image):
""" Returns a list of the full layers of an image, including itself (if specified), sorted
from base image outward. """

View file

@ -6,6 +6,7 @@ from cachetools import lru_cache
from data.database import LogEntry, LogEntryKind, User, db
# TODO: Find a way to get logs without slowing down pagination significantly.
def _logs_query(selections, start_time, end_time, performer=None, repository=None, namespace=None):
joined = (LogEntry
.select(*selections)

View file

@ -14,6 +14,10 @@ from data.database import (Repository, Namespace, RepositoryTag, Star, Image, Im
logger = logging.getLogger(__name__)
def get_public_repo_visibility():
return _basequery.get_public_repo_visibility()
def create_repository(namespace, name, creating_user, visibility='private'):
private = Visibility.get(name=visibility)
namespace_user = User.get(username=namespace)
@ -64,11 +68,7 @@ def purge_repository(namespace_name, repository_name):
fetched.delete_instance(recursive=True, delete_nullable=False)
def find_repository_with_garbage(filter_list=None):
# TODO(jschorr): Remove the filter once we have turned the experiment on for everyone.
if filter_list is not None and not filter_list:
return None
def find_repository_with_garbage():
epoch_timestamp = get_epoch_timestamp()
try:
@ -80,11 +80,9 @@ def find_repository_with_garbage(filter_list=None):
(RepositoryTag.lifetime_end_ts <=
(epoch_timestamp - Namespace.removed_tag_expiration_s)))
.limit(500)
.distinct()
.alias('candidates'))
if filter_list:
candidates = candidates.where(Namespace.username << filter_list)
found = (RepositoryTag
.select(candidates.c.repository_id)
.from_(candidates)
@ -102,11 +100,6 @@ def find_repository_with_garbage(filter_list=None):
def garbage_collect_repository(namespace_name, repository_name):
# If the namespace is the async experiment, don't perform garbage collection here.
# TODO(jschorr): Remove this check once we have turned the experiment on for everyone.
if namespace_name in config.app_config.get('EXP_ASYNC_GARBAGE_COLLECTION', []):
return
repo = get_repository(namespace_name, repository_name)
if repo is not None:
garbage_collect_repo(repo)
@ -247,28 +240,10 @@ def get_visible_repositories(username, namespace=None, page=None, limit=None, in
if not include_public and not username:
return []
fields = [Repository.name, Repository.id, Repository.description, Visibility.name,
Namespace.username]
query = _visible_repository_query(username=username, page=page,
limit=limit, namespace=namespace, include_public=include_public,
select_models=fields)
if limit:
query = query.limit(limit)
if namespace:
query = query.where(Namespace.username == namespace)
return query
def _visible_repository_query(username=None, include_public=True, limit=None,
page=None, namespace=None, select_models=[]):
query = (Repository
.select(*select_models) # MySQL/RDS complains is there are selected models for counts.
.select(Repository.name, Repository.id, Repository.description, Namespace.username,
Repository.visibility)
.distinct()
.join(Visibility)
.switch(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.switch(Repository)
@ -338,36 +313,15 @@ def get_sorted_matching_repositories(prefix, only_public, checker, limit=10):
# For performance reasons, we conduct the repo name and repo namespace searches on their
# own. This also affords us the ability to give higher precedence to repository names matching
# over namespaces, which is semantically correct.
get_search_results(Repository.name ** (prefix + '%'), with_count=True)
get_search_results(Repository.name ** (prefix + '%'), with_count=False)
get_search_results(_basequery.prefix_search(Repository.name, prefix), with_count=True)
get_search_results(_basequery.prefix_search(Repository.name, prefix), with_count=False)
get_search_results(Namespace.username ** (prefix + '%'), with_count=True)
get_search_results(Namespace.username ** (prefix + '%'), with_count=False)
get_search_results(_basequery.prefix_search(Namespace.username, prefix), with_count=True)
get_search_results(_basequery.prefix_search(Namespace.username, prefix), with_count=False)
return results
def get_matching_repositories(repo_term, username=None, limit=10, include_public=True):
namespace_term = repo_term
name_term = repo_term
visible = _visible_repository_query(username, include_public=include_public)
search_clauses = (Repository.name ** ('%' + name_term + '%') |
Namespace.username ** ('%' + namespace_term + '%'))
# Handle the case where the user has already entered a namespace path.
if repo_term.find('/') > 0:
parts = repo_term.split('/', 1)
namespace_term = '/'.join(parts[:-1])
name_term = parts[-1]
search_clauses = (Repository.name ** ('%' + name_term + '%') &
Namespace.username ** ('%' + namespace_term + '%'))
return visible.where(search_clauses).limit(limit)
def lookup_repository(repo_id):
try:
return Repository.get(Repository.id == repo_id)

View file

@ -134,8 +134,7 @@ def list_repository_tag_history(repo_obj, page=1, size=100, specific_tag=None):
.join(Image)
.where(RepositoryTag.repository == repo_obj)
.where(RepositoryTag.hidden == False)
.order_by(RepositoryTag.lifetime_start_ts.desc())
.order_by(RepositoryTag.name)
.order_by(RepositoryTag.lifetime_start_ts.desc(), RepositoryTag.name)
.paginate(page, size))
if specific_tag:

View file

@ -137,12 +137,13 @@ def add_or_invite_to_team(inviter, team, user_obj=None, email=None, requires_inv
def get_matching_user_teams(team_prefix, user_obj, limit=10):
team_prefix_search = _basequery.prefix_search(Team.name, team_prefix)
query = (Team
.select()
.join(User)
.switch(Team)
.join(TeamMember)
.where(TeamMember.user == user_obj, Team.name ** (team_prefix + '%'))
.where(TeamMember.user == user_obj, team_prefix_search)
.distinct(Team.id)
.limit(limit))
@ -162,6 +163,7 @@ def get_organization_team(orgname, teamname):
def get_matching_admined_teams(team_prefix, user_obj, limit=10):
team_prefix_search = _basequery.prefix_search(Team.name, team_prefix)
admined_orgs = (_basequery.get_user_organizations(user_obj.username)
.switch(Team)
.join(TeamRole)
@ -172,7 +174,7 @@ def get_matching_admined_teams(team_prefix, user_obj, limit=10):
.join(User)
.switch(Team)
.join(TeamMember)
.where(Team.name ** (team_prefix + '%'), Team.organization << (admined_orgs))
.where(team_prefix_search, Team.organization << (admined_orgs))
.distinct(Team.id)
.limit(limit))
@ -180,8 +182,8 @@ def get_matching_admined_teams(team_prefix, user_obj, limit=10):
def get_matching_teams(team_prefix, organization):
query = Team.select().where(Team.name ** (team_prefix + '%'),
Team.organization == organization)
team_prefix_search = _basequery.prefix_search(Team.name, team_prefix)
query = Team.select().where(team_prefix_search, Team.organization == organization)
return query.limit(10)

View file

@ -203,9 +203,11 @@ def get_matching_robots(name_prefix, username, limit=10):
prefix_checks = False
for org in admined_orgs:
prefix_checks = prefix_checks | (User.username ** (org.username + '+' + name_prefix + '%'))
org_search = _basequery.prefix_search(User.username, org.username + '+' + name_prefix)
prefix_checks = prefix_checks | org_search
prefix_checks = prefix_checks | (User.username ** (username + '+' + name_prefix + '%'))
user_search = _basequery.prefix_search(User.username, username + '+' + name_prefix)
prefix_checks = prefix_checks | user_search
return User.select().where(prefix_checks).limit(limit)
@ -493,26 +495,25 @@ def get_user_or_org_by_customer_id(customer_id):
def get_matching_user_namespaces(namespace_prefix, username, limit=10):
namespace_search = _basequery.prefix_search(Namespace.username, namespace_prefix)
base_query = (Namespace
.select()
.distinct()
.limit(limit)
.join(Repository, on=(Repository.namespace_user == Namespace.id))
.join(RepositoryPermission, JOIN_LEFT_OUTER)
.where(Namespace.username ** (namespace_prefix + '%')))
.where(namespace_search))
return _basequery.filter_to_repos_for_user(base_query, username)
return _basequery.filter_to_repos_for_user(base_query, username).limit(limit)
def get_matching_users(username_prefix, robot_namespace=None,
organization=None):
direct_user_query = (User.username ** (username_prefix + '%') &
(User.organization == False) & (User.robot == False))
user_search = _basequery.prefix_search(User.username, username_prefix)
direct_user_query = (user_search & (User.organization == False) & (User.robot == False))
if robot_namespace:
robot_prefix = format_robot_username(robot_namespace, username_prefix)
direct_user_query = (direct_user_query |
(User.username ** (robot_prefix + '%') &
(User.robot == True)))
robot_search = _basequery.prefix_search(User.username, robot_prefix)
direct_user_query = (direct_user_query | (robot_search & (User.robot == True)))
query = (User
.select(User.username, User.email, User.robot)

View file

@ -67,22 +67,20 @@ class WorkQueue(object):
def _item_by_id_for_update(self, queue_id):
return db_for_update(QueueItem.select().where(QueueItem.id == queue_id)).get()
def get_metrics(self, require_transaction=True):
guard = self._transaction_factory(db) if require_transaction else NoopWith()
with guard:
now = datetime.utcnow()
name_match_query = self._name_match_query()
def get_metrics(self):
now = datetime.utcnow()
name_match_query = self._name_match_query()
running_query = self._running_jobs(now, name_match_query)
running_count = running_query.distinct().count()
running_query = self._running_jobs(now, name_match_query)
running_count = running_query.distinct().count()
available_query = self._available_jobs(now, name_match_query)
available_count = available_query.select(QueueItem.queue_name).distinct().count()
available_query = self._available_jobs(now, name_match_query)
available_count = available_query.select(QueueItem.queue_name).distinct().count()
available_not_running_query = self._available_jobs_not_running(now, name_match_query,
running_query)
available_not_running_count = (available_not_running_query.select(QueueItem.queue_name)
.distinct().count())
available_not_running_query = self._available_jobs_not_running(now, name_match_query,
running_query)
available_not_running_count = (available_not_running_query.select(QueueItem.queue_name)
.distinct().count())
return (running_count, available_not_running_count, available_count)
@ -127,7 +125,10 @@ class WorkQueue(object):
params['available_after'] = available_date
with self._transaction_factory(db):
return str(QueueItem.create(**params).id)
r = str(QueueItem.create(**params).id)
if self._metric_queue:
self._metric_queue.put('Added', 1, dimensions={'queue': self._queue_name})
return r
def get(self, processing_time=300):
"""