Load images and storage references in bulk during V1 synthesize

Currently, we perform multiple queries for each layer, making it much slower (especially cross-region)

Fixes #413
This commit is contained in:
Joseph Schorr 2015-09-29 17:53:39 -04:00
parent 78e8aefd45
commit 35c35d9913
4 changed files with 173 additions and 157 deletions

View file

@ -98,7 +98,7 @@ class SignedManifest(object):
@property
def layers(self):
""" Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
starting from the root image and working toward the leaf node.
starting from the leaf image and working toward the base node.
"""
for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY],
self._parsed[_HISTORY_KEY])):
@ -258,42 +258,79 @@ def write_manifest_by_digest(namespace, repo_name, manifest_ref):
def _write_manifest(namespace, repo_name, manifest):
# Ensure that the manifest is for this repository.
if manifest.namespace != namespace or manifest.repo_name != repo_name:
raise NameInvalid()
# Ensure that the repository exists.
repo = model.repository.get_repository(namespace, repo_name)
if repo is None:
raise NameInvalid()
# Lookup all the images and their parent images (if any) inside the manifest. This will let us
# know which V1 images we need to synthesize and which ones are invalid.
layers = list(manifest.layers)
docker_image_ids = [mdata.v1_metadata.docker_id for mdata in layers]
parent_image_ids = [mdata.v1_metadata.parent for mdata in layers
if mdata.v1_metadata.parent]
all_image_ids = list(set(docker_image_ids + parent_image_ids))
images_query = model.image.lookup_repository_images(repo, all_image_ids)
images_map = {image.docker_image_id: image for image in images_query}
# Lookup the storages associated with each blob in the manifest.
checksums = [str(mdata.digest) for mdata in manifest.layers]
storage_query = model.storage.lookup_repo_storages_by_checksum(repo, checksums)
storage_map = {storage.checksum: storage for storage in storage_query}
# Synthesize the V1 metadata for each layer.
manifest_digest = manifest.digest
tag_name = manifest.tag
leaf_layer = None
try:
for mdata in manifest.layers:
# Store the v1 metadata in the db
v1_mdata = mdata.v1_metadata
digest_str = str(mdata.digest)
model.image.synthesize_v1_image(namespace, repo_name, digest_str, v1_mdata.docker_id,
v1_mdata.created, v1_mdata.comment, v1_mdata.command,
mdata.v1_metadata_str, v1_mdata.parent)
leaf_layer = mdata
for mdata in layers:
digest_str = str(mdata.digest)
v1_mdata = mdata.v1_metadata
except model.InvalidImageException:
raise BlobUnknown(detail={'digest': digest_str})
# If there is already a V1 image for this layer, nothing more to do.
if v1_mdata.docker_id in images_map:
continue
if leaf_layer is None:
# Lookup the parent image for the layer, if any.
parent_image = None
if v1_mdata.parent is not None:
parent_image = images_map.get(v1_mdata.parent)
if parent_image is None:
msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent)
raise ManifestInvalid(detail={'message': msg})
# Synthesize and store the v1 metadata in the db.
blob_storage = storage_map.get(digest_str)
if blob_storage is None:
raise BlobUnknown(detail={'digest': digest_str})
image = model.image.synthesize_v1_image(repo, blob_storage, v1_mdata.docker_id,
v1_mdata.created, v1_mdata.comment, v1_mdata.command,
mdata.v1_metadata_str, parent_image)
images_map[v1_mdata.docker_id] = image
if not layers:
# The manifest doesn't actually reference any layers!
raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'})
# Store the manifest pointing to the tag.
leaf_layer = layers[0]
model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer.v1_metadata.docker_id,
manifest_digest, request.data)
# Spawn the repo_push event.
repo = model.repository.get_repository(namespace, repo_name)
if repo is not None:
event_data = {
'updated_tags': [tag_name],
}
event_data = {
'updated_tags': [tag_name],
}
track_and_log('push_repo', repo)
spawn_notification(repo, 'repo_push', event_data)
track_and_log('push_repo', repo)
spawn_notification(repo, 'repo_push', event_data)
response = make_response('OK', 202)
response.headers['Docker-Content-Digest'] = manifest_digest