From 1450b7e84cab6037e7483aa07f28527e20fd94a5 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 18 Aug 2015 11:53:48 -0400 Subject: [PATCH] Fix verbs support in V2 --- TODO.md | 5 ----- data/model/image.py | 42 ++++++++++++++++++++++++++---------- data/model/storage.py | 3 ++- endpoints/v1/registry.py | 8 +++---- endpoints/v2/manifest.py | 2 +- endpoints/verbs.py | 27 ++++++++++------------- formats/aci.py | 2 +- formats/squashed.py | 12 +++++++++-- formats/tarimageformatter.py | 7 +++--- 9 files changed, 64 insertions(+), 44 deletions(-) diff --git a/TODO.md b/TODO.md index 53fbdc5b2..0cb79709b 100644 --- a/TODO.md +++ b/TODO.md @@ -1,15 +1,10 @@ -- Convert the flattened image generator to use the database ancestry instead of the json file -- Convert verbs to load json from either db or storage -- Convert verbs to work with v1 and cas layer storage locations - Fix all tests - Fix uncompressed size backfill - File issue to move queries out of uncompressed size backfill and use subquery random - Consider removing the new jwest dependency - Update the max fresh on registry tokens, 300s is not long enough to complete all registry actions -- Fix the sizes stored in the db - Make sure we handle more of the v2 api than just what is required to push and pull - Handle registry API error conditions - Fill in the registry v2 methods on other storage engines - Write a script to backfill the json metadata - Verify the manifest, and throw the proper error if unverified -- Convert uploads to get locked to a placement, e.g. once an upload starts, all communication goes through that replica diff --git a/data/model/image.py b/data/model/image.py index 5bd70af91..be1d5b589 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -5,7 +5,7 @@ from peewee import JOIN_LEFT_OUTER, fn from datetime import datetime from data.model import (DataModelException, db_transaction, _basequery, storage, - InvalidImageException) + InvalidImageException, config) from data.database import (Image, Repository, ImageStoragePlacement, Namespace, ImageStorage, ImageStorageLocation, RepositoryPermission, db_for_update) @@ -79,7 +79,10 @@ def get_repository_images_base(namespace_name, repository_name, query_modifier): .where(Repository.name == repository_name, Namespace.username == namespace_name)) query = query_modifier(query) + return _translate_placements_to_images_with_locations(query) + +def _translate_placements_to_images_with_locations(query): location_list = list(query) images = {} @@ -113,7 +116,7 @@ def lookup_repository_images(namespace_name, repository_name, docker_image_ids): def get_matching_repository_images(namespace_name, repository_name, docker_image_ids): def modify_query(query): - return query.where(Image.docker_image_id << docker_image_ids) + return query.where(Image.docker_image_id << list(docker_image_ids)) return get_repository_images_base(namespace_name, repository_name, modify_query) @@ -360,25 +363,42 @@ def get_repo_image_by_storage_checksum(namespace, repository_name, storage_check raise InvalidImageException(msg) -def get_image_json(image, store): - """ Returns the JSON definition data for this image. """ +def has_image_json(image): + """ Returns the whether there exists a JSON definition data for the image. """ + if image.v1_json_metadata: + return bool(image.v1_json_metadata) + + store = config.store + return store.exists(image.storage.locations, store.image_json_path(image.storage.uuid)) + + +def get_image_json(image): + """ Returns the JSON definition data for the image. """ if image.v1_json_metadata: return image.v1_json_metadata + store = config.store return store.get_content(image.storage.locations, store.image_json_path(image.storage.uuid)) -def get_image_ancestors(image, include_image=True): - """ Returns a query of the full ancestors of an image, including itself. """ +def get_image_layers(image): + """ Returns a list of the full layers of an image, including itself (if specified), sorted + from base image outward. """ ancestors = image.ancestors.split('/')[1:-1] image_ids = [ancestor_id for ancestor_id in ancestors if ancestor_id] - if include_image: - image_ids.append(image.id) + image_ids.append(str(image.id)) - if not image_ids: - return [] + query = (ImageStoragePlacement + .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation) + .join(ImageStorageLocation) + .switch(ImageStoragePlacement) + .join(ImageStorage, JOIN_LEFT_OUTER) + .join(Image) + .where(Image.id << image_ids)) - return Image.select().where(Image.id << image_ids) + image_list = list(_translate_placements_to_images_with_locations(query)) + image_list.sort(key=lambda image: image_ids.index(str(image.id))) + return image_list def synthesize_v1_image(namespace, repository_name, storage_checksum, docker_image_id, diff --git a/data/model/storage.py b/data/model/storage.py index 9072b656b..c697c5de8 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -218,8 +218,9 @@ def get_repo_storage_by_checksum(namespace, repository_name, checksum): raise InvalidImageException('No storage found with checksum {0}'.format(checksum)) -def get_layer_path(storage_record, store): +def get_layer_path(storage_record): """ Returns the path in the storage engine to the layer data referenced by the storage row. """ + store = config.store if not storage_record.cas_path: return store.v1_image_layer_path(storage_record.uuid) diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index 24afd3525..42e63dc66 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -168,7 +168,7 @@ def put_image_layer(namespace, repository, image_id): repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) try: logger.debug('Retrieving image data') - json_data = model.image.get_image_json(repo_image, store) + json_data = model.image.get_image_json(repo_image) except (IOError, AttributeError): logger.exception('Exception when retrieving image data') abort(404, 'Image %(image_id)s not found', issue='unknown-image', @@ -296,7 +296,7 @@ def put_image_checksum(namespace, repository, image_id): abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) logger.debug('Looking up repo layer data') - if not model.image.get_image_json(repo_image, store): + if not model.image.has_image_json(repo_image): abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) logger.debug('Marking image path') @@ -349,7 +349,7 @@ def get_image_json(namespace, repository, image_id, headers): logger.debug('Looking up repo layer data') try: - data = repo_image.get_image_json(repo_image, store) + data = repo_image.get_image_json(repo_image) except (IOError, AttributeError): flask_abort(404) @@ -463,7 +463,7 @@ def put_image_json(namespace, repository, image_id): abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s', issue='invalid-request', image_id=image_id, parent_id=parent_id) - if not image_is_uploading(repo_image) and model.image.get_image_json(repo_image, store): + if not image_is_uploading(repo_image) and model.image.has_image_json(repo_image): exact_abort(409, 'Image already exists') set_uploading_flag(repo_image, True) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 9db160540..fcc7cd3d8 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -334,7 +334,7 @@ def __get_and_backfill_image_metadata(image): if image_metadata is None: logger.warning('Loading metadata from storage for image id: %s', image.id) - image.v1_json_metadata = model.image.get_image_json(image, storage) + image.v1_json_metadata = model.image.get_image_json(image) logger.info('Saving backfilled metadata for image id: %s', image.id) image.save() diff --git a/endpoints/verbs.py b/endpoints/verbs.py index e70afd051..9254c9d5b 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -23,16 +23,11 @@ logger = logging.getLogger(__name__) def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, image_json, - image_id_list): + image_list): store = Storage(app) - # For performance reasons, we load the full image list here, cache it, then disconnect from - # the database. - with database.UseThenDisconnect(app.config): - image_list = list(model.image.get_matching_repository_images(namespace, repository, - image_id_list)) - - image_list.sort(key=lambda image: image_id_list.index(image.docker_image_id)) + def get_image_json(image): + return json.loads(model.image.get_image_json(image)) def get_next_image(): for current_image in image_list: @@ -40,7 +35,7 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag def get_next_layer(): for current_image_entry in image_list: - current_image_path = store.image_layer_path(current_image_entry.storage.uuid) + current_image_path = model.storage.get_layer_path(current_image_entry.storage) current_image_stream = store.stream_read_file(current_image_entry.storage.locations, current_image_path) @@ -49,7 +44,7 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag yield current_image_stream stream = formatter.build_stream(namespace, repository, tag, synthetic_image_id, image_json, - get_next_image, get_next_layer) + get_next_image, get_next_layer, get_image_json) return stream.read @@ -88,7 +83,7 @@ def _write_synthetic_image_to_storage(verb, linked_storage_uuid, linked_location queue_file.add_exception_handler(handle_exception) - image_path = store.image_layer_path(linked_storage_uuid) + image_path = store.v1_image_layer_path(linked_storage_uuid) store.stream_write(linked_locations, image_path, queue_file) queue_file.close() @@ -122,7 +117,7 @@ def _verify_repo_verb(store, namespace, repository, tag, verb, checker=None): image_json = None if checker is not None: - image_json = json.loads(model.image.get_image_json(repo_image, store)) + image_json = json.loads(model.image.get_image_json(repo_image)) if not checker(image_json): logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repository, tag, verb) abort(404) @@ -169,7 +164,7 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= if not derived.uploading: logger.debug('Derived %s image %s exists in storage', verb, derived.uuid) - derived_layer_path = model.storage.get_layer_path(derived, store) + derived_layer_path = model.storage.get_layer_path(derived) download_url = store.get_direct_download_url(derived.locations, derived_layer_path) if download_url: logger.debug('Redirecting to download URL for derived %s image %s', verb, derived.uuid) @@ -181,14 +176,14 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= logger.debug('Sending cached derived %s image %s', verb, derived.uuid) return send_file(store.stream_read_file(derived.locations, derived_layer_path)) - # Load the ancestry for the image. - full_image_list = model.image.get_image_ancestors(repo_image) + # Load the full image list for the image. + full_image_list = model.image.get_image_layers(repo_image) logger.debug('Building and returning derived %s image %s', verb, derived.uuid) # Load the image's JSON layer. if not image_json: - image_json = json.loads(model.image.get_image_json(repo_image, store)) + image_json = json.loads(model.image.get_image_json(repo_image)) # Calculate a synthetic image ID. synthetic_image_id = hashlib.sha256(tag_image.docker_image_id + ':' + verb).hexdigest() diff --git a/formats/aci.py b/formats/aci.py index 718c35445..11a7a06ef 100644 --- a/formats/aci.py +++ b/formats/aci.py @@ -10,7 +10,7 @@ class ACIImage(TarImageFormatter): """ def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator): + layer_json, get_image_iterator, get_layer_iterator, get_image_json): # ACI Format (.tar): # manifest - The JSON manifest # rootfs - The root file system diff --git a/formats/squashed.py b/formats/squashed.py index b26a069fd..8c567d583 100644 --- a/formats/squashed.py +++ b/formats/squashed.py @@ -20,7 +20,8 @@ class SquashedDockerImage(TarImageFormatter): """ def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator): + layer_json, get_image_iterator, get_layer_iterator, get_image_json): + # Docker import V1 Format (.tar): # repositories - JSON file containing a repo -> tag -> image map # {image ID folder}: @@ -52,7 +53,14 @@ class SquashedDockerImage(TarImageFormatter): # Yield the merged layer data's header. estimated_file_size = 0 for image in get_image_iterator(): - estimated_file_size += image.storage.uncompressed_size + # In V1 we have the actual uncompressed size, which is needed for back compat with + # older versions of Docker. + # In V2, we use the size given in the image JSON. + if image.storage.uncompressed_size: + estimated_file_size += image.storage.uncompressed_size + else: + image_json = get_image_json(image) + estimated_file_size += image_json.get('Size', 0) yield self.tar_file_header(synthetic_image_id + '/layer.tar', estimated_file_size) diff --git a/formats/tarimageformatter.py b/formats/tarimageformatter.py index 361f6256d..38d3fb3ab 100644 --- a/formats/tarimageformatter.py +++ b/formats/tarimageformatter.py @@ -5,16 +5,17 @@ class TarImageFormatter(object): """ Base class for classes which produce a TAR containing image and layer data. """ def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator): + get_image_iterator, get_layer_iterator, get_image_json): """ Builds and streams a synthetic .tar.gz that represents the formatted TAR created by this class's implementation. """ return GzipWrap(self.stream_generator(namespace, repository, tag, synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator)) + get_image_iterator, get_layer_iterator, + get_image_json)) def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator): + layer_json, get_image_iterator, get_layer_iterator, get_image_json): raise NotImplementedError def tar_file(self, name, contents):