Merge remote-tracking branch 'origin/master' into nomenclature

Conflicts:
	endpoints/common.py
	endpoints/notificationhelper.py
	test/data/test.db
	workers/dockerfilebuild.py
This commit is contained in:
Jake Moshenko 2014-10-23 13:25:37 -04:00
commit 1461310ab8
200 changed files with 240935 additions and 798 deletions

View file

@ -7,9 +7,9 @@ from datetime import datetime
from peewee import *
from data.read_slave import ReadSlaveModel
from sqlalchemy.engine.url import make_url
from urlparse import urlparse
from util.names import urn_generator
logger = logging.getLogger(__name__)
@ -80,6 +80,16 @@ def uuid_generator():
return str(uuid.uuid4())
def close_db_filter(_):
if not db.is_closed():
logger.debug('Disconnecting from database.')
db.close()
if read_slave.obj is not None and not read_slave.is_closed():
logger.debug('Disconnecting from read slave.')
read_slave.close()
class BaseModel(ReadSlaveModel):
class Meta:
database = db

View file

@ -21,6 +21,10 @@ def upgrade(tables):
sa.PrimaryKeyConstraint('id', name=op.f('pk_imagestoragetransformation'))
)
op.create_index('imagestoragetransformation_name', 'imagestoragetransformation', ['name'], unique=True)
op.bulk_insert(tables.imagestoragetransformation,
[
{'id':1, 'name':'squash'},
])
op.create_table('derivedimagestorage',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('source_id', sa.Integer(), nullable=True),

View file

@ -13,7 +13,8 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
Notification, ImageStorageLocation, ImageStoragePlacement,
ExternalNotificationEvent, ExternalNotificationMethod,
RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite,
DerivedImageStorage, random_string_generator, db, BUILD_PHASE)
DerivedImageStorage, ImageStorageTransformation, random_string_generator,
db, BUILD_PHASE)
from peewee import JOIN_LEFT_OUTER, fn
from util.validation import (validate_username, validate_email, validate_password,
INVALID_PASSWORD_MESSAGE)
@ -578,6 +579,13 @@ def get_user(username):
return None
def get_namespace_user(username):
try:
return User.get(User.username == username)
except User.DoesNotExist:
return None
def get_user_or_org(username):
try:
return User.get(User.username == username, User.robot == False)
@ -1247,6 +1255,20 @@ def find_or_create_derived_storage(source, transformation_name, preferred_locati
return new_storage
def delete_derived_storage_by_uuid(storage_uuid):
try:
image_storage = get_storage_by_uuid(storage_uuid)
except InvalidImageException:
return
try:
DerivedImageStorage.get(derivative=image_storage)
except DerivedImageStorage.DoesNotExist:
return
image_storage.delete_instance(recursive=True)
def get_storage_by_uuid(storage_uuid):
placements = list(ImageStoragePlacement
.select(ImageStoragePlacement, ImageStorage, ImageStorageLocation)
@ -1305,7 +1327,15 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created
# We cleanup any old checksum in case it's a retry after a fail
fetched.storage.checksum = None
fetched.storage.created = dateutil.parser.parse(created_date_str).replace(tzinfo=None)
fetched.storage.created = datetime.now()
if created_date_str is not None:
try:
fetched.storage.created = dateutil.parser.parse(created_date_str).replace(tzinfo=None)
except:
# parse raises different exceptions, so we cannot use a specific kind of handler here.
pass
fetched.storage.comment = comment
fetched.storage.command = command
@ -1390,48 +1420,87 @@ def garbage_collect_repository(namespace_name, repository_name):
all_images = {int(img.id): img for img in all_repo_images}
to_remove = set(all_images.keys()).difference(referenced_anscestors)
logger.info('Cleaning up unreferenced images: %s', to_remove)
if len(to_remove) > 0:
logger.info('Cleaning up unreferenced images: %s', to_remove)
storage_id_whitelist = {all_images[to_remove_id].storage.id for to_remove_id in to_remove}
uuids_to_check_for_gc = set()
for image_id_to_remove in to_remove:
image_to_remove = all_images[image_id_to_remove]
Image.delete().where(Image.id << list(to_remove)).execute()
logger.debug('Adding image storage to the gc list: %s',
image_to_remove.storage.uuid)
uuids_to_check_for_gc.add(image_to_remove.storage.uuid)
garbage_collect_storage(storage_id_whitelist)
image_to_remove.delete_instance()
return len(to_remove)
def remove_storages(query):
for storage in query:
logger.debug('Garbage collecting image storage: %s', storage.uuid)
image_path = config.store.image_path(storage.uuid)
for placement in storage.imagestorageplacement_set:
location_name = placement.location.name
placement.delete_instance()
config.store.remove({location_name}, image_path)
def garbage_collect_storage(storage_id_whitelist):
# We are going to make the conscious decision to not delete image storage inside the transaction
# This may end up producing garbage in s3, trading off for higher availability in the database
def placements_query_to_paths_set(placements_query):
return {(placement.location.name, config.store.image_path(placement.storage.uuid))
for placement in placements_query}
storage.delete_instance(recursive=True)
def orphaned_storage_query(select_base_query, candidates):
return (select_base_query
.switch(ImageStorage)
.join(Image, JOIN_LEFT_OUTER)
.switch(ImageStorage)
.join(DerivedImageStorage, JOIN_LEFT_OUTER,
on=(ImageStorage.id == DerivedImageStorage.derivative))
.where(ImageStorage.id << list(candidates))
.group_by(ImageStorage)
.having((fn.Count(Image.id) == 0) & (fn.Count(DerivedImageStorage.id) == 0)))
if uuids_to_check_for_gc:
storage_to_remove = (ImageStorage
.select()
.join(Image, JOIN_LEFT_OUTER)
.group_by(ImageStorage)
.where(ImageStorage.uuid << list(uuids_to_check_for_gc))
.having(fn.Count(Image.id) == 0))
logger.debug('Garbage collecting storage from candidates: %s', storage_id_whitelist)
with config.app_config['DB_TRANSACTION_FACTORY'](db):
# Find out which derived storages will be removed, and add them to the whitelist
orphaned_from_candidates = list(orphaned_storage_query(ImageStorage.select(ImageStorage.id),
storage_id_whitelist))
remove_storages(storage_to_remove)
if len(orphaned_from_candidates) > 0:
derived_to_remove = (ImageStorage
.select(ImageStorage.id)
.join(DerivedImageStorage,
on=(ImageStorage.id == DerivedImageStorage.derivative))
.where(DerivedImageStorage.source << orphaned_from_candidates))
storage_id_whitelist.update({derived.id for derived in derived_to_remove})
# Now remove any derived image storages whose sources have been removed
derived_storages_to_remove = (ImageStorage
.select()
.join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative))
.where(DerivedImageStorage.source >> None))
remove_storages(derived_storages_to_remove)
# Remove the dervived image storages with sources of orphaned storages
(DerivedImageStorage
.delete()
.where(DerivedImageStorage.source << orphaned_from_candidates)
.execute())
return len(to_remove)
# Track all of the data that should be removed from blob storage
placements_to_remove = orphaned_storage_query(ImageStoragePlacement
.select(ImageStoragePlacement,
ImageStorage,
ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage),
storage_id_whitelist)
paths_to_remove = placements_query_to_paths_set(placements_to_remove.clone())
# Remove the placements for orphaned storages
placements_subquery = list(placements_to_remove.clone().select(ImageStoragePlacement.id))
if len(placements_subquery) > 0:
(ImageStoragePlacement
.delete()
.where(ImageStoragePlacement.id << list(placements_subquery))
.execute())
# Remove the all orphaned storages
orphaned_storages = list(orphaned_storage_query(ImageStorage.select(ImageStorage.id),
storage_id_whitelist))
if len(orphaned_storages) > 0:
(ImageStorage
.delete()
.where(ImageStorage.id << orphaned_storages)
.execute())
# Delete the actual blob storage
for location_name, image_path in paths_to_remove:
logger.debug('Removing %s from %s', image_path, location_name)
config.store.remove({location_name}, image_path)
def get_tag_image(namespace_name, repository_name, tag_name):

View file

@ -30,7 +30,7 @@ class UserEventsBuilderModule(object):
if not redis_config:
# This is the old key name.
redis_config = {
'host': app.config.get('USER_EVENTS_REDIS_HOSTNAME')
'host': app.config.get('USER_EVENTS_REDIS_HOSTNAME'),
}
user_events = UserEventBuilder(redis_config)
@ -45,7 +45,7 @@ class UserEventsBuilderModule(object):
class UserEvent(object):
"""
"""
Defines a helper class for publishing to realtime user events
as backed by Redis.
"""
@ -74,7 +74,7 @@ class UserEvent(object):
thread = threading.Thread(target=conduct)
thread.start()
class UserEventListener(object):
"""
Defines a helper class for subscribing to realtime user events as
@ -90,7 +90,7 @@ class UserEventListener(object):
@staticmethod
def _user_event_key(username, event_id):
return 'user/%s/events/%s' % (username, event_id)
def event_stream(self):
"""
Starts listening for events on the channel(s), yielding for each event