Merge remote-tracking branch 'origin/master' into nomenclature

Conflicts:
	endpoints/common.py
	endpoints/notificationhelper.py
	test/data/test.db
	workers/dockerfilebuild.py
This commit is contained in:
Jake Moshenko 2014-10-23 13:25:37 -04:00
commit 1461310ab8
200 changed files with 240935 additions and 798 deletions

View file

@ -13,7 +13,8 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
Notification, ImageStorageLocation, ImageStoragePlacement,
ExternalNotificationEvent, ExternalNotificationMethod,
RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite,
DerivedImageStorage, random_string_generator, db, BUILD_PHASE)
DerivedImageStorage, ImageStorageTransformation, random_string_generator,
db, BUILD_PHASE)
from peewee import JOIN_LEFT_OUTER, fn
from util.validation import (validate_username, validate_email, validate_password,
INVALID_PASSWORD_MESSAGE)
@ -578,6 +579,13 @@ def get_user(username):
return None
def get_namespace_user(username):
try:
return User.get(User.username == username)
except User.DoesNotExist:
return None
def get_user_or_org(username):
try:
return User.get(User.username == username, User.robot == False)
@ -1247,6 +1255,20 @@ def find_or_create_derived_storage(source, transformation_name, preferred_locati
return new_storage
def delete_derived_storage_by_uuid(storage_uuid):
try:
image_storage = get_storage_by_uuid(storage_uuid)
except InvalidImageException:
return
try:
DerivedImageStorage.get(derivative=image_storage)
except DerivedImageStorage.DoesNotExist:
return
image_storage.delete_instance(recursive=True)
def get_storage_by_uuid(storage_uuid):
placements = list(ImageStoragePlacement
.select(ImageStoragePlacement, ImageStorage, ImageStorageLocation)
@ -1305,7 +1327,15 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created
# We cleanup any old checksum in case it's a retry after a fail
fetched.storage.checksum = None
fetched.storage.created = dateutil.parser.parse(created_date_str).replace(tzinfo=None)
fetched.storage.created = datetime.now()
if created_date_str is not None:
try:
fetched.storage.created = dateutil.parser.parse(created_date_str).replace(tzinfo=None)
except:
# parse raises different exceptions, so we cannot use a specific kind of handler here.
pass
fetched.storage.comment = comment
fetched.storage.command = command
@ -1390,48 +1420,87 @@ def garbage_collect_repository(namespace_name, repository_name):
all_images = {int(img.id): img for img in all_repo_images}
to_remove = set(all_images.keys()).difference(referenced_anscestors)
logger.info('Cleaning up unreferenced images: %s', to_remove)
if len(to_remove) > 0:
logger.info('Cleaning up unreferenced images: %s', to_remove)
storage_id_whitelist = {all_images[to_remove_id].storage.id for to_remove_id in to_remove}
uuids_to_check_for_gc = set()
for image_id_to_remove in to_remove:
image_to_remove = all_images[image_id_to_remove]
Image.delete().where(Image.id << list(to_remove)).execute()
logger.debug('Adding image storage to the gc list: %s',
image_to_remove.storage.uuid)
uuids_to_check_for_gc.add(image_to_remove.storage.uuid)
garbage_collect_storage(storage_id_whitelist)
image_to_remove.delete_instance()
return len(to_remove)
def remove_storages(query):
for storage in query:
logger.debug('Garbage collecting image storage: %s', storage.uuid)
image_path = config.store.image_path(storage.uuid)
for placement in storage.imagestorageplacement_set:
location_name = placement.location.name
placement.delete_instance()
config.store.remove({location_name}, image_path)
def garbage_collect_storage(storage_id_whitelist):
# We are going to make the conscious decision to not delete image storage inside the transaction
# This may end up producing garbage in s3, trading off for higher availability in the database
def placements_query_to_paths_set(placements_query):
return {(placement.location.name, config.store.image_path(placement.storage.uuid))
for placement in placements_query}
storage.delete_instance(recursive=True)
def orphaned_storage_query(select_base_query, candidates):
return (select_base_query
.switch(ImageStorage)
.join(Image, JOIN_LEFT_OUTER)
.switch(ImageStorage)
.join(DerivedImageStorage, JOIN_LEFT_OUTER,
on=(ImageStorage.id == DerivedImageStorage.derivative))
.where(ImageStorage.id << list(candidates))
.group_by(ImageStorage)
.having((fn.Count(Image.id) == 0) & (fn.Count(DerivedImageStorage.id) == 0)))
if uuids_to_check_for_gc:
storage_to_remove = (ImageStorage
.select()
.join(Image, JOIN_LEFT_OUTER)
.group_by(ImageStorage)
.where(ImageStorage.uuid << list(uuids_to_check_for_gc))
.having(fn.Count(Image.id) == 0))
logger.debug('Garbage collecting storage from candidates: %s', storage_id_whitelist)
with config.app_config['DB_TRANSACTION_FACTORY'](db):
# Find out which derived storages will be removed, and add them to the whitelist
orphaned_from_candidates = list(orphaned_storage_query(ImageStorage.select(ImageStorage.id),
storage_id_whitelist))
remove_storages(storage_to_remove)
if len(orphaned_from_candidates) > 0:
derived_to_remove = (ImageStorage
.select(ImageStorage.id)
.join(DerivedImageStorage,
on=(ImageStorage.id == DerivedImageStorage.derivative))
.where(DerivedImageStorage.source << orphaned_from_candidates))
storage_id_whitelist.update({derived.id for derived in derived_to_remove})
# Now remove any derived image storages whose sources have been removed
derived_storages_to_remove = (ImageStorage
.select()
.join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative))
.where(DerivedImageStorage.source >> None))
remove_storages(derived_storages_to_remove)
# Remove the dervived image storages with sources of orphaned storages
(DerivedImageStorage
.delete()
.where(DerivedImageStorage.source << orphaned_from_candidates)
.execute())
return len(to_remove)
# Track all of the data that should be removed from blob storage
placements_to_remove = orphaned_storage_query(ImageStoragePlacement
.select(ImageStoragePlacement,
ImageStorage,
ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage),
storage_id_whitelist)
paths_to_remove = placements_query_to_paths_set(placements_to_remove.clone())
# Remove the placements for orphaned storages
placements_subquery = list(placements_to_remove.clone().select(ImageStoragePlacement.id))
if len(placements_subquery) > 0:
(ImageStoragePlacement
.delete()
.where(ImageStoragePlacement.id << list(placements_subquery))
.execute())
# Remove the all orphaned storages
orphaned_storages = list(orphaned_storage_query(ImageStorage.select(ImageStorage.id),
storage_id_whitelist))
if len(orphaned_storages) > 0:
(ImageStorage
.delete()
.where(ImageStorage.id << orphaned_storages)
.execute())
# Delete the actual blob storage
for location_name, image_path in paths_to_remove:
logger.debug('Removing %s from %s', image_path, location_name)
config.store.remove({location_name}, image_path)
def get_tag_image(namespace_name, repository_name, tag_name):