Fix verbs support in V2

This commit is contained in:
Joseph Schorr 2015-08-18 11:53:48 -04:00
parent cf030e2a98
commit 1450b7e84c
9 changed files with 64 additions and 44 deletions

View file

@ -1,15 +1,10 @@
- Convert the flattened image generator to use the database ancestry instead of the json file
- Convert verbs to load json from either db or storage
- Convert verbs to work with v1 and cas layer storage locations
- Fix all tests - Fix all tests
- Fix uncompressed size backfill - Fix uncompressed size backfill
- File issue to move queries out of uncompressed size backfill and use subquery random - File issue to move queries out of uncompressed size backfill and use subquery random
- Consider removing the new jwest dependency - Consider removing the new jwest dependency
- Update the max fresh on registry tokens, 300s is not long enough to complete all registry actions - Update the max fresh on registry tokens, 300s is not long enough to complete all registry actions
- Fix the sizes stored in the db
- Make sure we handle more of the v2 api than just what is required to push and pull - Make sure we handle more of the v2 api than just what is required to push and pull
- Handle registry API error conditions - Handle registry API error conditions
- Fill in the registry v2 methods on other storage engines - Fill in the registry v2 methods on other storage engines
- Write a script to backfill the json metadata - Write a script to backfill the json metadata
- Verify the manifest, and throw the proper error if unverified - Verify the manifest, and throw the proper error if unverified
- Convert uploads to get locked to a placement, e.g. once an upload starts, all communication goes through that replica

View file

@ -5,7 +5,7 @@ from peewee import JOIN_LEFT_OUTER, fn
from datetime import datetime from datetime import datetime
from data.model import (DataModelException, db_transaction, _basequery, storage, from data.model import (DataModelException, db_transaction, _basequery, storage,
InvalidImageException) InvalidImageException, config)
from data.database import (Image, Repository, ImageStoragePlacement, Namespace, ImageStorage, from data.database import (Image, Repository, ImageStoragePlacement, Namespace, ImageStorage,
ImageStorageLocation, RepositoryPermission, db_for_update) ImageStorageLocation, RepositoryPermission, db_for_update)
@ -79,7 +79,10 @@ def get_repository_images_base(namespace_name, repository_name, query_modifier):
.where(Repository.name == repository_name, Namespace.username == namespace_name)) .where(Repository.name == repository_name, Namespace.username == namespace_name))
query = query_modifier(query) query = query_modifier(query)
return _translate_placements_to_images_with_locations(query)
def _translate_placements_to_images_with_locations(query):
location_list = list(query) location_list = list(query)
images = {} images = {}
@ -113,7 +116,7 @@ def lookup_repository_images(namespace_name, repository_name, docker_image_ids):
def get_matching_repository_images(namespace_name, repository_name, docker_image_ids): def get_matching_repository_images(namespace_name, repository_name, docker_image_ids):
def modify_query(query): def modify_query(query):
return query.where(Image.docker_image_id << docker_image_ids) return query.where(Image.docker_image_id << list(docker_image_ids))
return get_repository_images_base(namespace_name, repository_name, modify_query) return get_repository_images_base(namespace_name, repository_name, modify_query)
@ -360,25 +363,42 @@ def get_repo_image_by_storage_checksum(namespace, repository_name, storage_check
raise InvalidImageException(msg) raise InvalidImageException(msg)
def get_image_json(image, store): def has_image_json(image):
""" Returns the JSON definition data for this image. """ """ Returns the whether there exists a JSON definition data for the image. """
if image.v1_json_metadata:
return bool(image.v1_json_metadata)
store = config.store
return store.exists(image.storage.locations, store.image_json_path(image.storage.uuid))
def get_image_json(image):
""" Returns the JSON definition data for the image. """
if image.v1_json_metadata: if image.v1_json_metadata:
return image.v1_json_metadata return image.v1_json_metadata
store = config.store
return store.get_content(image.storage.locations, store.image_json_path(image.storage.uuid)) return store.get_content(image.storage.locations, store.image_json_path(image.storage.uuid))
def get_image_ancestors(image, include_image=True): def get_image_layers(image):
""" Returns a query of the full ancestors of an image, including itself. """ """ Returns a list of the full layers of an image, including itself (if specified), sorted
from base image outward. """
ancestors = image.ancestors.split('/')[1:-1] ancestors = image.ancestors.split('/')[1:-1]
image_ids = [ancestor_id for ancestor_id in ancestors if ancestor_id] image_ids = [ancestor_id for ancestor_id in ancestors if ancestor_id]
if include_image: image_ids.append(str(image.id))
image_ids.append(image.id)
if not image_ids: query = (ImageStoragePlacement
return [] .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.join(Image)
.where(Image.id << image_ids))
return Image.select().where(Image.id << image_ids) image_list = list(_translate_placements_to_images_with_locations(query))
image_list.sort(key=lambda image: image_ids.index(str(image.id)))
return image_list
def synthesize_v1_image(namespace, repository_name, storage_checksum, docker_image_id, def synthesize_v1_image(namespace, repository_name, storage_checksum, docker_image_id,

View file

@ -218,8 +218,9 @@ def get_repo_storage_by_checksum(namespace, repository_name, checksum):
raise InvalidImageException('No storage found with checksum {0}'.format(checksum)) raise InvalidImageException('No storage found with checksum {0}'.format(checksum))
def get_layer_path(storage_record, store): def get_layer_path(storage_record):
""" Returns the path in the storage engine to the layer data referenced by the storage row. """ """ Returns the path in the storage engine to the layer data referenced by the storage row. """
store = config.store
if not storage_record.cas_path: if not storage_record.cas_path:
return store.v1_image_layer_path(storage_record.uuid) return store.v1_image_layer_path(storage_record.uuid)

View file

@ -168,7 +168,7 @@ def put_image_layer(namespace, repository, image_id):
repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) repo_image = model.image.get_repo_image_extended(namespace, repository, image_id)
try: try:
logger.debug('Retrieving image data') logger.debug('Retrieving image data')
json_data = model.image.get_image_json(repo_image, store) json_data = model.image.get_image_json(repo_image)
except (IOError, AttributeError): except (IOError, AttributeError):
logger.exception('Exception when retrieving image data') logger.exception('Exception when retrieving image data')
abort(404, 'Image %(image_id)s not found', issue='unknown-image', abort(404, 'Image %(image_id)s not found', issue='unknown-image',
@ -296,7 +296,7 @@ def put_image_checksum(namespace, repository, image_id):
abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id)
logger.debug('Looking up repo layer data') logger.debug('Looking up repo layer data')
if not model.image.get_image_json(repo_image, store): if not model.image.has_image_json(repo_image):
abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id) abort(404, 'Image not found: %(image_id)s', issue='unknown-image', image_id=image_id)
logger.debug('Marking image path') logger.debug('Marking image path')
@ -349,7 +349,7 @@ def get_image_json(namespace, repository, image_id, headers):
logger.debug('Looking up repo layer data') logger.debug('Looking up repo layer data')
try: try:
data = repo_image.get_image_json(repo_image, store) data = repo_image.get_image_json(repo_image)
except (IOError, AttributeError): except (IOError, AttributeError):
flask_abort(404) flask_abort(404)
@ -463,7 +463,7 @@ def put_image_json(namespace, repository, image_id):
abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s', abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s',
issue='invalid-request', image_id=image_id, parent_id=parent_id) issue='invalid-request', image_id=image_id, parent_id=parent_id)
if not image_is_uploading(repo_image) and model.image.get_image_json(repo_image, store): if not image_is_uploading(repo_image) and model.image.has_image_json(repo_image):
exact_abort(409, 'Image already exists') exact_abort(409, 'Image already exists')
set_uploading_flag(repo_image, True) set_uploading_flag(repo_image, True)

View file

@ -334,7 +334,7 @@ def __get_and_backfill_image_metadata(image):
if image_metadata is None: if image_metadata is None:
logger.warning('Loading metadata from storage for image id: %s', image.id) logger.warning('Loading metadata from storage for image id: %s', image.id)
image.v1_json_metadata = model.image.get_image_json(image, storage) image.v1_json_metadata = model.image.get_image_json(image)
logger.info('Saving backfilled metadata for image id: %s', image.id) logger.info('Saving backfilled metadata for image id: %s', image.id)
image.save() image.save()

View file

@ -23,16 +23,11 @@ logger = logging.getLogger(__name__)
def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, image_json, def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, image_json,
image_id_list): image_list):
store = Storage(app) store = Storage(app)
# For performance reasons, we load the full image list here, cache it, then disconnect from def get_image_json(image):
# the database. return json.loads(model.image.get_image_json(image))
with database.UseThenDisconnect(app.config):
image_list = list(model.image.get_matching_repository_images(namespace, repository,
image_id_list))
image_list.sort(key=lambda image: image_id_list.index(image.docker_image_id))
def get_next_image(): def get_next_image():
for current_image in image_list: for current_image in image_list:
@ -40,7 +35,7 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag
def get_next_layer(): def get_next_layer():
for current_image_entry in image_list: for current_image_entry in image_list:
current_image_path = store.image_layer_path(current_image_entry.storage.uuid) current_image_path = model.storage.get_layer_path(current_image_entry.storage)
current_image_stream = store.stream_read_file(current_image_entry.storage.locations, current_image_stream = store.stream_read_file(current_image_entry.storage.locations,
current_image_path) current_image_path)
@ -49,7 +44,7 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag
yield current_image_stream yield current_image_stream
stream = formatter.build_stream(namespace, repository, tag, synthetic_image_id, image_json, stream = formatter.build_stream(namespace, repository, tag, synthetic_image_id, image_json,
get_next_image, get_next_layer) get_next_image, get_next_layer, get_image_json)
return stream.read return stream.read
@ -88,7 +83,7 @@ def _write_synthetic_image_to_storage(verb, linked_storage_uuid, linked_location
queue_file.add_exception_handler(handle_exception) queue_file.add_exception_handler(handle_exception)
image_path = store.image_layer_path(linked_storage_uuid) image_path = store.v1_image_layer_path(linked_storage_uuid)
store.stream_write(linked_locations, image_path, queue_file) store.stream_write(linked_locations, image_path, queue_file)
queue_file.close() queue_file.close()
@ -122,7 +117,7 @@ def _verify_repo_verb(store, namespace, repository, tag, verb, checker=None):
image_json = None image_json = None
if checker is not None: if checker is not None:
image_json = json.loads(model.image.get_image_json(repo_image, store)) image_json = json.loads(model.image.get_image_json(repo_image))
if not checker(image_json): if not checker(image_json):
logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repository, tag, verb) logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repository, tag, verb)
abort(404) abort(404)
@ -169,7 +164,7 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=
if not derived.uploading: if not derived.uploading:
logger.debug('Derived %s image %s exists in storage', verb, derived.uuid) logger.debug('Derived %s image %s exists in storage', verb, derived.uuid)
derived_layer_path = model.storage.get_layer_path(derived, store) derived_layer_path = model.storage.get_layer_path(derived)
download_url = store.get_direct_download_url(derived.locations, derived_layer_path) download_url = store.get_direct_download_url(derived.locations, derived_layer_path)
if download_url: if download_url:
logger.debug('Redirecting to download URL for derived %s image %s', verb, derived.uuid) logger.debug('Redirecting to download URL for derived %s image %s', verb, derived.uuid)
@ -181,14 +176,14 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=
logger.debug('Sending cached derived %s image %s', verb, derived.uuid) logger.debug('Sending cached derived %s image %s', verb, derived.uuid)
return send_file(store.stream_read_file(derived.locations, derived_layer_path)) return send_file(store.stream_read_file(derived.locations, derived_layer_path))
# Load the ancestry for the image. # Load the full image list for the image.
full_image_list = model.image.get_image_ancestors(repo_image) full_image_list = model.image.get_image_layers(repo_image)
logger.debug('Building and returning derived %s image %s', verb, derived.uuid) logger.debug('Building and returning derived %s image %s', verb, derived.uuid)
# Load the image's JSON layer. # Load the image's JSON layer.
if not image_json: if not image_json:
image_json = json.loads(model.image.get_image_json(repo_image, store)) image_json = json.loads(model.image.get_image_json(repo_image))
# Calculate a synthetic image ID. # Calculate a synthetic image ID.
synthetic_image_id = hashlib.sha256(tag_image.docker_image_id + ':' + verb).hexdigest() synthetic_image_id = hashlib.sha256(tag_image.docker_image_id + ':' + verb).hexdigest()

View file

@ -10,7 +10,7 @@ class ACIImage(TarImageFormatter):
""" """
def stream_generator(self, namespace, repository, tag, synthetic_image_id, def stream_generator(self, namespace, repository, tag, synthetic_image_id,
layer_json, get_image_iterator, get_layer_iterator): layer_json, get_image_iterator, get_layer_iterator, get_image_json):
# ACI Format (.tar): # ACI Format (.tar):
# manifest - The JSON manifest # manifest - The JSON manifest
# rootfs - The root file system # rootfs - The root file system

View file

@ -20,7 +20,8 @@ class SquashedDockerImage(TarImageFormatter):
""" """
def stream_generator(self, namespace, repository, tag, synthetic_image_id, def stream_generator(self, namespace, repository, tag, synthetic_image_id,
layer_json, get_image_iterator, get_layer_iterator): layer_json, get_image_iterator, get_layer_iterator, get_image_json):
# Docker import V1 Format (.tar): # Docker import V1 Format (.tar):
# repositories - JSON file containing a repo -> tag -> image map # repositories - JSON file containing a repo -> tag -> image map
# {image ID folder}: # {image ID folder}:
@ -52,7 +53,14 @@ class SquashedDockerImage(TarImageFormatter):
# Yield the merged layer data's header. # Yield the merged layer data's header.
estimated_file_size = 0 estimated_file_size = 0
for image in get_image_iterator(): for image in get_image_iterator():
estimated_file_size += image.storage.uncompressed_size # In V1 we have the actual uncompressed size, which is needed for back compat with
# older versions of Docker.
# In V2, we use the size given in the image JSON.
if image.storage.uncompressed_size:
estimated_file_size += image.storage.uncompressed_size
else:
image_json = get_image_json(image)
estimated_file_size += image_json.get('Size', 0)
yield self.tar_file_header(synthetic_image_id + '/layer.tar', estimated_file_size) yield self.tar_file_header(synthetic_image_id + '/layer.tar', estimated_file_size)

View file

@ -5,16 +5,17 @@ class TarImageFormatter(object):
""" Base class for classes which produce a TAR containing image and layer data. """ """ Base class for classes which produce a TAR containing image and layer data. """
def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json, def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json,
get_image_iterator, get_layer_iterator): get_image_iterator, get_layer_iterator, get_image_json):
""" Builds and streams a synthetic .tar.gz that represents the formatted TAR created by this """ Builds and streams a synthetic .tar.gz that represents the formatted TAR created by this
class's implementation. class's implementation.
""" """
return GzipWrap(self.stream_generator(namespace, repository, tag, return GzipWrap(self.stream_generator(namespace, repository, tag,
synthetic_image_id, layer_json, synthetic_image_id, layer_json,
get_image_iterator, get_layer_iterator)) get_image_iterator, get_layer_iterator,
get_image_json))
def stream_generator(self, namespace, repository, tag, synthetic_image_id, def stream_generator(self, namespace, repository, tag, synthetic_image_id,
layer_json, get_image_iterator, get_layer_iterator): layer_json, get_image_iterator, get_layer_iterator, get_image_json):
raise NotImplementedError raise NotImplementedError
def tar_file(self, name, contents): def tar_file(self, name, contents):