diff --git a/data/database.py b/data/database.py index 45e45b057..fb060a041 100644 --- a/data/database.py +++ b/data/database.py @@ -192,7 +192,6 @@ class PermissionPrototype(BaseModel): ) - class AccessToken(BaseModel): friendly_name = CharField(null=True) code = CharField(default=random_string_generator(length=64), unique=True, @@ -238,6 +237,23 @@ class ImageStorage(BaseModel): uploading = BooleanField(default=True, null=True) +class ImageStorageTransformation(BaseModel): + name = CharField(index=True, unique=True) + + +class DerivedImageStorage(BaseModel): + source = ForeignKeyField(ImageStorage, null=True, related_name='source') + derivative = ForeignKeyField(ImageStorage, related_name='derivative') + transformation = ForeignKeyField(ImageStorageTransformation) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('source', 'transformation'), True), + ) + + class ImageStorageLocation(BaseModel): name = CharField(unique=True, index=True) @@ -422,4 +438,4 @@ all_models = [User, Repository, Image, AccessToken, Role, RepositoryPermission, OAuthApplication, OAuthAuthorizationCode, OAuthAccessToken, NotificationKind, Notification, ImageStorageLocation, ImageStoragePlacement, ExternalNotificationEvent, ExternalNotificationMethod, RepositoryNotification, - RepositoryAuthorizedEmail] + RepositoryAuthorizedEmail, ImageStorageTransformation, DerivedImageStorage] diff --git a/data/model/legacy.py b/data/model/legacy.py index d9b2079d8..faf9de223 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -70,6 +70,10 @@ class InvalidBuildTriggerException(DataModelException): pass +class InvalidImageException(DataModelException): + pass + + class TooManyUsersException(DataModelException): pass @@ -1055,6 +1059,14 @@ def __translate_ancestry(old_ancestry, translations, repository, username, prefe return '/%s/' % '/'.join(new_ids) +def _create_storage(location_name): + storage = ImageStorage.create() + location = ImageStorageLocation.get(name=location_name) + ImageStoragePlacement.create(location=location, storage=storage) + storage.locations = {location_name} + return storage + + def find_create_or_link_image(docker_image_id, repository, username, translations, preferred_location): with config.app_config['DB_TRANSACTION_FACTORY'](db): @@ -1093,10 +1105,7 @@ def find_create_or_link_image(docker_image_id, repository, username, translation origin_image_id = to_copy.id except Image.DoesNotExist: logger.debug('Creating new storage for docker id: %s', docker_image_id) - storage = ImageStorage.create() - location = ImageStorageLocation.get(name=preferred_location) - ImageStoragePlacement.create(location=location, storage=storage) - storage.locations = {preferred_location} + storage = _create_storage(preferred_location) logger.debug('Storage locations: %s', storage.locations) @@ -1114,6 +1123,43 @@ def find_create_or_link_image(docker_image_id, repository, username, translation return new_image +def find_or_create_derived_storage(source, transformation_name, preferred_location): + try: + found = (ImageStorage + .select(ImageStorage, DerivedImageStorage) + .join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative)) + .join(ImageStorageTransformation) + .where(DerivedImageStorage.source == source, + ImageStorageTransformation.name == transformation_name) + .get()) + + found.locations = {placement.location.name for placement in found.imagestorageplacement_set} + return found + except ImageStorage.DoesNotExist: + logger.debug('Creating storage dervied from source: %s', source.uuid) + trans = ImageStorageTransformation.get(name=transformation_name) + new_storage = _create_storage(preferred_location) + DerivedImageStorage.create(source=source, derivative=new_storage, transformation=trans) + return new_storage + + +def get_storage_by_uuid(storage_uuid): + placements = list(ImageStoragePlacement + .select(ImageStoragePlacement, ImageStorage, ImageStorageLocation) + .join(ImageStorageLocation) + .switch(ImageStoragePlacement) + .join(ImageStorage) + .where(ImageStorage.uuid == storage_uuid)) + + if not placements: + raise InvalidImageException('No storage found with uuid: %s', storage_uuid) + + found = placements[0].storage + found.locations = {placement.location.name for placement in placements} + + return found + + def set_image_size(docker_image_id, namespace_name, repository_name, image_size): try: @@ -1252,15 +1298,8 @@ def garbage_collect_repository(namespace_name, repository_name): image_to_remove.delete_instance() - if uuids_to_check_for_gc: - storage_to_remove = (ImageStorage - .select() - .join(Image, JOIN_LEFT_OUTER) - .group_by(ImageStorage) - .where(ImageStorage.uuid << list(uuids_to_check_for_gc)) - .having(fn.Count(Image.id) == 0)) - - for storage in storage_to_remove: + def remove_storages(query): + for storage in query: logger.debug('Garbage collecting image storage: %s', storage.uuid) image_path = config.store.image_path(storage.uuid) @@ -1269,7 +1308,24 @@ def garbage_collect_repository(namespace_name, repository_name): placement.delete_instance() config.store.remove({location_name}, image_path) - storage.delete_instance() + storage.delete_instance(recursive=True) + + if uuids_to_check_for_gc: + storage_to_remove = (ImageStorage + .select() + .join(Image, JOIN_LEFT_OUTER) + .group_by(ImageStorage) + .where(ImageStorage.uuid << list(uuids_to_check_for_gc)) + .having(fn.Count(Image.id) == 0)) + + remove_storages(storage_to_remove) + + # Now remove any derived image storages whose sources have been removed + derived_storages_to_remove = (ImageStorage + .select() + .join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative)) + .where(DerivedImageStorage.source >> None)) + remove_storages(derived_storages_to_remove) return len(to_remove) diff --git a/endpoints/verbs.py b/endpoints/verbs.py index 1985f47d4..92d523537 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -1,26 +1,24 @@ import logging import json -import hashlib -from flask import (make_response, request, session, Response, redirect, - Blueprint, abort, send_file, make_response) +from flask import redirect, Blueprint, abort, send_file from app import storage as store, app from auth.auth import process_auth from auth.permissions import ReadRepositoryPermission from data import model -from endpoints.registry import set_cache_headers +from data import database from util.queuefile import QueueFile from util.queueprocess import QueueProcess from util.gzipwrap import GzipWrap from util.streamlayerformat import StreamLayerMerger -from werkzeug.wsgi import wrap_file verbs = Blueprint('verbs', __name__) logger = logging.getLogger(__name__) + def _open_stream(namespace, repository, image_list): def get_next_layer(): for current_image_id in image_list: @@ -32,20 +30,25 @@ def _open_stream(namespace, repository, image_list): logger.debug('Returning image layer %s: %s' % (current_image_id, current_image_path)) yield current_image_stream + database.configure(app.config) stream = GzipWrap(StreamLayerMerger(get_next_layer).get_generator()) return stream.read -def _write_synthetic_image_to_storage(namespace, repository, locations, - synthetic_image_id, queue_file): - # TODO: make sure this synthetic image expires! - image_path = store.image_layer_path(synthetic_image_id) - store.stream_write(locations, image_path, queue_file) + +def _write_synthetic_image_to_storage(linked_storage_uuid, linked_locations, queue_file): + image_path = store.image_layer_path(linked_storage_uuid) + store.stream_write(linked_locations, image_path, queue_file) queue_file.close() + database.configure(app.config) + done_uploading = model.get_storage_by_uuid(linked_storage_uuid) + done_uploading.uploading = False + done_uploading.save() + + @verbs.route('////squash', methods=['GET']) @process_auth -@set_cache_headers -def get_squashed_tag(namespace, repository, tag, headers): +def get_squashed_tag(namespace, repository, tag): permission = ReadRepositoryPermission(namespace, repository) if permission.can() or model.repository_is_public(namespace, repository): # Lookup the requested tag. @@ -57,34 +60,26 @@ def get_squashed_tag(namespace, repository, tag, headers): repo_image = model.get_repo_image(namespace, repository, tag_image.docker_image_id) if not repo_image: abort(404) - - # Calculate a synthetic image ID by hashing the *image storage ID* with our - # secret. This is done to prevent the ID being guessable/overwritable by - # external pushes. - unhashed = str(repo_image.storage.id) + ':' + app.config['SECRET_KEY'] - synthetic_image_id = hashlib.sha256(unhashed).hexdigest() - # Check to see if the synthetic image ID exists in storage. If so, we just return a 302. - logger.debug('Looking up synthetic image %s', synthetic_image_id) - - locations = repo_image.storage.locations - saved_image_path = store.image_layer_path(synthetic_image_id) - if store.exists(locations, saved_image_path): - logger.debug('Synthetic image %s exists in storage', synthetic_image_id) - download_url = store.get_direct_download_url(locations, saved_image_path) + derived = model.find_or_create_derived_storage(repo_image.storage, 'squash', + store.preferred_locations[0]) + if not derived.uploading: + logger.debug('Derived image %s exists in storage', derived.uuid) + derived_layer_path = store.image_layer_path(derived.uuid) + download_url = store.get_direct_download_url(derived.locations, derived_layer_path) if download_url: - logger.debug('Redirecting to download URL for synthetic image %s', synthetic_image_id) - return redirect(download_url, code=302) + logger.debug('Redirecting to download URL for derived image %s', derived.uuid) + return redirect(download_url) - logger.debug('Sending cached synthetic image %s', synthetic_image_id) - return send_file(store.stream_read_file(locations, saved_image_path)) + logger.debug('Sending cached derived image %s', derived.uuid) + return send_file(store.stream_read_file(derived.locations, derived_layer_path)) # Load the ancestry for the image. - logger.debug('Building and returning synthetic image %s', synthetic_image_id) + logger.debug('Building and returning derived image %s', derived.uuid) uuid = repo_image.storage.uuid ancestry_data = store.get_content(repo_image.storage.locations, store.image_ancestry_path(uuid)) full_image_list = json.loads(ancestry_data) - + # Create a queue process to generate the data. The queue files will read from the process # and send the results to the client and storage. args = (namespace, repository, full_image_list) @@ -92,12 +87,12 @@ def get_squashed_tag(namespace, repository, tag, headers): client_queue_file = QueueFile(queue_process.create_queue(), 'client') storage_queue_file = QueueFile(queue_process.create_queue(), 'storage') - + # Start building. queue_process.run() # Start the storage saving. - storage_args = (namespace, repository, locations, synthetic_image_id, storage_queue_file) + storage_args = (derived.uuid, derived.locations, storage_queue_file) QueueProcess.run_process(_write_synthetic_image_to_storage, storage_args) # Return the client's data. diff --git a/initdb.py b/initdb.py index 34b1c0a08..26ef28678 100644 --- a/initdb.py +++ b/initdb.py @@ -243,6 +243,8 @@ def initialize_database(): ImageStorageLocation.create(name='local_eu') ImageStorageLocation.create(name='local_us') + ImageStorageTransformation.create(name='squash') + # NOTE: These MUST be copied over to NotificationKind, since every external # notification can also generate a Quay.io notification. ExternalNotificationEvent.create(name='repo_push') diff --git a/requirements-nover.txt b/requirements-nover.txt index a3c74e89b..262e0594d 100644 --- a/requirements-nover.txt +++ b/requirements-nover.txt @@ -36,3 +36,4 @@ psycopg2 pyyaml git+https://github.com/DevTable/aniso8601-fake.git git+https://github.com/DevTable/anunidecode.git +gipc \ No newline at end of file