From 9669320df2682ddb006b0d59052c53f4383539b9 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 7 Aug 2018 13:06:30 -0400 Subject: [PATCH] Fix pushing of manifests whose layers share blobs If a blob was repeated previously, we would get a constraint error from the ManifestBlob table --- data/model/tag.py | 25 +++++++++++++++++-------- test/registry/registry_tests.py | 22 ++++++++++++++++++++++ 2 files changed, 39 insertions(+), 8 deletions(-) diff --git a/data/model/tag.py b/data/model/tag.py index 46a356f58..3af310853 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -568,6 +568,10 @@ def store_tag_manifest_for_repo(repository_id, tag_name, manifest, leaf_layer_id """ Stores a tag manifest for a specific tag name in the database. Returns the TagManifest object, as well as a boolean indicating whether the TagManifest was created. """ + # Lookup all blobs in the manifest. + blobs = ImageStorage.select().where(ImageStorage.content_checksum << list(manifest.blob_digests)) + blob_map = {blob.content_checksum: blob for blob in blobs} + docker_image_id = leaf_layer_id or manifest.leaf_layer_v1_image_id with db_transaction(): tag = create_or_update_tag_for_repo(repository_id, tag_name, docker_image_id, @@ -579,7 +583,7 @@ def store_tag_manifest_for_repo(repository_id, tag_name, manifest, leaf_layer_id manifest.save() return manifest, False except TagManifest.DoesNotExist: - return _create_manifest(tag, manifest), True + return _create_manifest(tag, manifest, blob_map), True def get_active_tag(namespace, repo_name, tag_name): @@ -602,26 +606,31 @@ def get_possibly_expired_tag(namespace, repo_name, tag_name): def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest): tag = get_active_tag(namespace, repo_name, tag_name) - return _create_manifest(tag, manifest) - -def _create_manifest(tag, manifest): # Lookup all blobs in the manifest. blobs = ImageStorage.select().where(ImageStorage.content_checksum << list(manifest.blob_digests)) - blob_map = {} - for blob in blobs: - blob_map[blob.content_checksum] = blob + blob_map = {blob.content_checksum: blob for blob in blobs} + return _create_manifest(tag, manifest, blob_map) + + +def _create_manifest(tag, manifest, blob_map): + media_type = Manifest.media_type.get_id(manifest.media_type) with db_transaction(): - media_type = Manifest.media_type.get_id(manifest.media_type) manifest_row = Manifest.create(digest=manifest.digest, repository=tag.repository, manifest_bytes=manifest.bytes, media_type=media_type) ManifestLegacyImage.create(manifest=manifest_row, repository=tag.repository, image=tag.image) + + blobs_created = set() for index, blob_digest in enumerate(reversed(manifest.blob_digests)): image_storage = blob_map.get(blob_digest) if image_storage is None: raise DataModelException('Missing blob for manifest') + if image_storage.id in blobs_created: + continue + + blobs_created.add(image_storage.id) ManifestBlob.create(manifest=manifest_row, repository=tag.repository, blob=image_storage, blob_index=index) diff --git a/test/registry/registry_tests.py b/test/registry/registry_tests.py index 9a2760b7d..6b47ad763 100644 --- a/test/registry/registry_tests.py +++ b/test/registry/registry_tests.py @@ -1104,3 +1104,25 @@ def test_login_scopes(username, password, scopes, expected_access, expect_succes {'SERVER_HOSTNAME': 'localhost:5000'}) assert payload is not None assert payload['access'] == expected_access + + +def test_push_pull_same_blobs(pusher, puller, liveserver_session, app_reloader): + """ Test: Push and pull of an image to a new repository where a blob is shared between layers. """ + credentials = ('devtable', 'password') + + layer_bytes = layer_bytes_for_contents('some contents') + images = [ + Image(id='parentid', bytes=layer_bytes, parent_id=None), + Image(id='someid', bytes=layer_bytes, parent_id='parentid'), + ] + + options = ProtocolOptions() + options.skip_head_checks = True # Since the blob will already exist. + + # Push a new repository. + pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', images, + credentials=credentials, options=options) + + # Pull the repository to verify. + puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', images, + credentials=credentials, options=options)