From abd2e3c234b68d41c472c985fef1a059f3fc3db2 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 12 Feb 2016 17:39:27 +0200 Subject: [PATCH 1/2] V1 Docker ID <-> V2 layer SHA mismatch fix Fix handling of V1 Docker ID <-> V2 layer SHA mismatch by dynamically rewriting the manifest to use new synthesized IDs for all layers above the mismatch. Also adds a bunch of tests for this and other use cases, fixes a bug around manifest digest uniqueness and fixes the 5.5 migration for MySQL. --- data/database.py | 2 +- ...2f_add_created_field_to_the_blobupload_.py | 2 +- ...77_remove_uniqueness_constraint_on_the_.py | 28 ++++ data/model/image.py | 13 +- endpoints/v2/manifest.py | 101 +++++++++--- test/registry_tests.py | 147 ++++++++++++++---- 6 files changed, 240 insertions(+), 53 deletions(-) create mode 100644 data/migrations/versions/e4129c93e477_remove_uniqueness_constraint_on_the_.py diff --git a/data/database.py b/data/database.py index e4a0ad957..84f8b63db 100644 --- a/data/database.py +++ b/data/database.py @@ -642,7 +642,7 @@ class RepositoryTag(BaseModel): class TagManifest(BaseModel): tag = ForeignKeyField(RepositoryTag, index=True, unique=True) - digest = CharField(index=True, unique=True) + digest = CharField(index=True) json_data = TextField() diff --git a/data/migrations/versions/88e0f440a2f_add_created_field_to_the_blobupload_.py b/data/migrations/versions/88e0f440a2f_add_created_field_to_the_blobupload_.py index 1d4241b89..0fd70fc87 100644 --- a/data/migrations/versions/88e0f440a2f_add_created_field_to_the_blobupload_.py +++ b/data/migrations/versions/88e0f440a2f_add_created_field_to_the_blobupload_.py @@ -16,7 +16,7 @@ from datetime import datetime def upgrade(tables): ### commands auto generated by Alembic - please adjust! ### - now = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + now = datetime.now().strftime("'%Y-%m-%d %H:%M:%S'") op.add_column('blobupload', sa.Column('created', sa.DateTime(), nullable=False, server_default=sa.text(now))) op.create_index('blobupload_created', 'blobupload', ['created'], unique=False) ### end Alembic commands ### diff --git a/data/migrations/versions/e4129c93e477_remove_uniqueness_constraint_on_the_.py b/data/migrations/versions/e4129c93e477_remove_uniqueness_constraint_on_the_.py new file mode 100644 index 000000000..08cc02c7c --- /dev/null +++ b/data/migrations/versions/e4129c93e477_remove_uniqueness_constraint_on_the_.py @@ -0,0 +1,28 @@ +"""Remove uniqueness constraint on the TagManifest digest column + +Revision ID: e4129c93e477 +Revises: 956a0833223 +Create Date: 2016-02-12 17:22:48.039791 + +""" + +# revision identifiers, used by Alembic. +revision = 'e4129c93e477' +down_revision = '956a0833223' + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + +def upgrade(tables): + ### commands auto generated by Alembic - please adjust! ### + op.drop_index('tagmanifest_digest', table_name='tagmanifest') + op.create_index('tagmanifest_digest', 'tagmanifest', ['digest'], unique=False) + ### end Alembic commands ### + + +def downgrade(tables): + ### commands auto generated by Alembic - please adjust! ### + op.drop_index('tagmanifest_digest', table_name='tagmanifest') + op.create_index('tagmanifest_digest', 'tagmanifest', ['digest'], unique=True) + ### end Alembic commands ### diff --git a/data/model/image.py b/data/model/image.py index 0665c5750..f8a699273 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -2,7 +2,7 @@ import logging import dateutil.parser import random -from peewee import JOIN_LEFT_OUTER, SQL +from peewee import JOIN_LEFT_OUTER, IntegrityError from datetime import datetime from data.model import (DataModelException, db_transaction, _basequery, storage, @@ -405,10 +405,13 @@ def synthesize_v1_image(repo, image_storage, docker_image_id, created_date_str, aggregate_size = _basequery.calculate_image_aggregate_size(ancestors, image_storage.image_size, parent_image) - return Image.create(docker_image_id=docker_image_id, ancestors=ancestors, comment=comment, - command=command, v1_json_metadata=v1_json_metadata, created=created, - storage=image_storage, repository=repo, parent=parent_image, - aggregate_size=aggregate_size) + try: + return Image.create(docker_image_id=docker_image_id, ancestors=ancestors, comment=comment, + command=command, v1_json_metadata=v1_json_metadata, created=created, + storage=image_storage, repository=repo, parent=parent_image, + aggregate_size=aggregate_size) + except IntegrityError: + return Image.get(docker_image_id=docker_image_id, repository=repo) def ensure_image_locations(*names): diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index da4fccbd8..e22a7d423 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -2,9 +2,10 @@ import logging import jwt.utils import json import features +import hashlib from peewee import IntegrityError -from flask import make_response, request, url_for, abort +from flask import make_response, request, url_for from collections import namedtuple, OrderedDict from jwkest.jws import SIGNER_ALGS, keyrep from datetime import datetime @@ -126,7 +127,8 @@ class SignedManifest(object): try: image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY]) except digest_tools.InvalidDigestException: - raise ManifestInvalid() + err_message = 'could not parse manifest digest: %s' % blob_sum_obj[_BLOB_SUM_KEY] + raise ManifestInvalid(detail={'message': err_message}) metadata_string = history_obj[_V1_COMPAT_KEY] @@ -134,6 +136,9 @@ class SignedManifest(object): command_list = v1_metadata.get('container_config', {}).get('Cmd', None) command = json.dumps(command_list) if command_list else None + if not 'id' in v1_metadata: + raise ManifestInvalid(detail={'message': 'invalid manifest v1 history'}) + extracted = ExtractedV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), v1_metadata.get('created'), v1_metadata.get('comment'), command) @@ -180,6 +185,14 @@ class SignedManifestBuilder(object): _V1_COMPAT_KEY: v1_json_metadata, }) + def add_top_layer(self, layer_digest, v1_json_metadata): + self._fs_layer_digests.insert(0, { + _BLOB_SUM_KEY: layer_digest, + }) + self._history.insert(0, { + _V1_COMPAT_KEY: v1_json_metadata, + }) + def build(self, json_web_key): """ Build the payload and sign it, returning a SignedManifest object. """ @@ -301,8 +314,8 @@ def _reject_manifest2_schema2(func): def write_manifest_by_tagname(namespace, repo_name, manifest_ref): try: manifest = SignedManifest(request.data) - except ValueError as ve: - raise ManifestInvalid() + except ValueError: + raise ManifestInvalid(detail={'message': 'could not parse manifest'}) if manifest.tag != manifest_ref: raise TagInvalid() @@ -320,14 +333,29 @@ def write_manifest_by_digest(namespace, repo_name, manifest_ref): try: manifest = SignedManifest(request.data) except ValueError: - raise ManifestInvalid() + raise ManifestInvalid(detail={'message': 'could not parse manifest'}) if manifest.digest != manifest_ref: - raise ManifestInvalid() + raise ManifestInvalid(detail={'message': 'manifest digest mismatch'}) return _write_manifest(namespace, repo_name, manifest) +def _updated_v1_metadata(v1_metadata_json, updated_id_map): + parsed = json.loads(v1_metadata_json) + parsed['id'] = updated_id_map[parsed['id']] + + if parsed.get('parent'): + parsed['parent'] = updated_id_map[parsed['parent']] + + if parsed.get('container_config', {}).get('Image'): + existing_image = parsed['container_config']['Image'] + if existing_image in updated_id_map: + parsed['container_config']['image'] = updated_id_map[existing_image] + + return json.dumps(parsed) + + def _write_manifest(namespace, repo_name, manifest): # Ensure that the manifest is for this repository. If the manifest's namespace is empty, then # it is for the library namespace and we need an extra check. @@ -363,22 +391,56 @@ def _write_manifest(namespace, repo_name, manifest): storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums) storage_map = {storage.content_checksum: storage for storage in storage_query} - # Synthesize the V1 metadata for each layer. - manifest_digest = manifest.digest + # Ensure that we have valid V1 docker IDs. If Docker gives us a V1 layer ID pointing to + # a storage with a content checksum different from the existing, then we need to rewrite + # the Docker ID to ensure consistency. tag_name = manifest.tag + updated_manifest_builder = SignedManifestBuilder(namespace, repo_name, tag_name) + has_updated_manifest = False + updated_id_map = {} for mdata in layers: digest_str = str(mdata.digest) v1_mdata = mdata.v1_metadata + updated_id_map[v1_mdata.docker_id] = v1_mdata.docker_id + + # Ensure that all blobs exist. + blob_storage = storage_map.get(digest_str) + if blob_storage is None: + raise BlobUnknown(detail={'digest': digest_str}) - # If there is already a V1 image for this layer, nothing more to do. if v1_mdata.docker_id in images_map: # Ensure that the V1 image's storage matches the V2 blob. If not, we've found - # a data inconsistency and need to fail. + # a data inconsistency and need to create a new layer ID for the V1 image. v1_image = images_map[v1_mdata.docker_id] - if v1_image.storage.content_checksum != digest_str: - logger.error('Checksum mismatch on V1 layer %s (#%s): Expected digest %s, found %s', - v1_mdata.docker_id, v1_image.id, digest_str, v1_image.storage.content_checksum) + if has_updated_manifest or v1_image.storage.content_checksum != digest_str: + new_synthetic_id = hashlib.sha256(mdata.v1_metadata_str + '@' + digest_str).hexdigest() + logger.debug('Got content mismatch for layer %s under repo %s/%s. New ID: %s', + v1_mdata.docker_id, namespace, repo_name, new_synthetic_id) + + updated_id_map[v1_mdata.docker_id] = new_synthetic_id + has_updated_manifest = True + + # Update the manifest withn the new ID (if any). + v1_metadata_json = mdata.v1_metadata_str + if has_updated_manifest: + v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map) + + updated_manifest_builder.add_top_layer(digest_str, v1_metadata_json) + + # If the manifest was changed due to an updated layer ID, then create a new manifest + # based on the updated data. + if has_updated_manifest: + manifest = updated_manifest_builder.build(docker_v2_signing_key) + layers = list(manifest.layers) + + # Synthesize the V1 metadata for each layer. + for mdata in layers: + v1_mdata = mdata.v1_metadata + + # If the layer with the V1 id already exists, then nothing more to do. We've ensured + # it points to the correct content SHA above. + if v1_mdata.docker_id in images_map: continue # Lookup the parent image for the layer, if any. @@ -390,10 +452,8 @@ def _write_manifest(namespace, repo_name, manifest): raise ManifestInvalid(detail={'message': msg}) # Synthesize and store the v1 metadata in the db. - blob_storage = storage_map.get(digest_str) - if blob_storage is None: - raise BlobUnknown(detail={'digest': digest_str}) - + digest_str = str(mdata.digest) + blob_storage = storage_map[digest_str] image = model.image.synthesize_v1_image(repo, blob_storage, v1_mdata.docker_id, v1_mdata.created, v1_mdata.comment, v1_mdata.command, mdata.v1_metadata_str, parent_image) @@ -405,9 +465,10 @@ def _write_manifest(namespace, repo_name, manifest): raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) # Store the manifest pointing to the tag. + manifest_digest = manifest.digest leaf_layer = layers[-1] model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer.v1_metadata.docker_id, - manifest_digest, request.data) + manifest_digest, manifest.bytes) # Spawn the repo_push event. event_data = { @@ -481,9 +542,11 @@ def _generate_and_store_manifest(namespace, repo_name, tag_name): try: return model.tag.associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest.digest, manifest.bytes) - except IntegrityError: + except IntegrityError as ie: + logger.debug('Got integrity error: %s', ie) try: return model.tag.load_tag_manifest(namespace, repo_name, tag_name) except model.InvalidManifestException: + logger.exception('Exception when generating manifest') raise model.DataModelException('Could not load or generate manifest') diff --git a/test/registry_tests.py b/test/registry_tests.py index a5770bc8a..8f4f96d49 100644 --- a/test/registry_tests.py +++ b/test/registry_tests.py @@ -6,6 +6,7 @@ import random import string import resumablehashlib import binascii +import uuid import Crypto.Random from cachetools import lru_cache @@ -166,23 +167,29 @@ def _get_repo_name(namespace, name): return '%s/%s' % (namespace, name) -def _get_full_contents(image_data): +def _get_full_contents(image_data, additional_fields=False): if 'chunks' in image_data: # Data is just for chunking; no need for a real TAR. return image_data['contents'] layer_data = StringIO() - tar_file_info = tarfile.TarInfo(name='contents') - tar_file_info.type = tarfile.REGTYPE - tar_file_info.size = len(image_data['contents']) + def add_file(name, contents): + tar_file_info = tarfile.TarInfo(name=name) + tar_file_info.type = tarfile.REGTYPE + tar_file_info.size = len(contents) - tar_file = tarfile.open(fileobj=layer_data, mode='w|gz') - tar_file.addfile(tar_file_info, StringIO(image_data['contents'])) - tar_file.close() + tar_file = tarfile.open(fileobj=layer_data, mode='w|gz') + tar_file.addfile(tar_file_info, StringIO(contents)) + tar_file.close() + + add_file('contents', image_data['contents']) + if additional_fields: + add_file('anotherfile', str(uuid.uuid4())) layer_bytes = layer_data.getvalue() layer_data.close() + return layer_bytes @@ -240,10 +247,10 @@ class RegistryTestCaseMixin(LiveServerTestCase): self.csrf_token = '' self.csrf_token = self.conduct('GET', '/__test/csrf').text - def do_tag(self, namespace, repository, tag, image_id, expected_code=200): + def do_tag(self, namespace, repository, tag, image_id, expected_code=200, auth='sig'): repo_name = _get_repo_name(namespace, repository) self.conduct('PUT', '/v1/repositories/%s/tags/%s' % (repo_name, tag), - data='"%s"' % image_id, expected_code=expected_code, auth='sig') + data='"%s"' % image_id, expected_code=expected_code, auth=auth) def conduct_api_login(self, username, password): self.conduct('POST', '/api/v1/signin', @@ -256,6 +263,13 @@ class RegistryTestCaseMixin(LiveServerTestCase): data=json.dumps(dict(visibility=visibility)), headers={'Content-Type': 'application/json'}) + def assertContents(self, image_data, response): + if 'chunks' in image_data: + return + + tar = tarfile.open(fileobj=StringIO(response.content)) + self.assertEquals(tar.extractfile('contents').read(), image_data['contents']) + class BaseRegistryMixin(object): def conduct(self, method, url, headers=None, data=None, auth=None, params=None, expected_code=200, @@ -311,7 +325,10 @@ class V1RegistryMixin(BaseRegistryMixin): class V1RegistryPushMixin(V1RegistryMixin): - def do_push(self, namespace, repository, username, password, images=None, expect_failure=None): + push_version = 'v1' + + def do_push(self, namespace, repository, username, password, images=None, expect_failure=None, + munge_shas=False): images = images or self._get_default_images() auth = (username, password) repo_name = _get_repo_name(namespace, repository) @@ -328,7 +345,6 @@ class V1RegistryPushMixin(V1RegistryMixin): if expected_code != 201: return - last_image_id = None for image_data in images: image_id = image_data['id'] last_image_id = image_id @@ -363,8 +379,10 @@ class V1RegistryPushMixin(V1RegistryMixin): class V1RegistryPullMixin(V1RegistryMixin): + pull_version = 'v1' + def do_pull(self, namespace, repository, username=None, password='password', expect_failure=None, - images=None): + images=None, munge_shas=False): images = images or self._get_default_images() repo_name = _get_repo_name(namespace, repository) @@ -377,27 +395,37 @@ class V1RegistryPullMixin(V1RegistryMixin): prefix = '/v1/repositories/%s/' % repo_name - # GET /v1/repositories/{namespace}/{repository}/ + # GET /v1/repositories/{namespace}/{repository}/images expected_code = _get_expected_code(expect_failure, 1, 200) self.conduct('GET', prefix + 'images', auth=auth, expected_code=expected_code) if expected_code != 200: return - # GET /v1/repositories/{namespace}/{repository}/ - result = json.loads(self.conduct('GET', prefix + 'tags', auth='sig').text) + # GET /v1/repositories/{namespace}/{repository}/tags + tags_result = json.loads(self.conduct('GET', prefix + 'tags', auth='sig').text) + self.assertEquals(1, len(tags_result.values())) - self.assertEquals(len(images), len(result.values())) + # Ensure we do (or do not) have a matching image ID. + tag_image_id = tags_result['latest'] + known_ids = [item['id'] for item in images] - for image_data in images: - image_id = image_data['id'] - self.assertIn(image_id, result.values()) + self.assertEquals(not munge_shas, tag_image_id in known_ids) + # Retrieve the ancestry of the tag image. + image_prefix = '/v1/images/%s/' % tag_image_id + ancestors = self.conduct('GET', image_prefix + 'ancestry', auth='sig').json() + for index, image_id in enumerate(ancestors): # /v1/images/{imageID}/{ancestry, json, layer} image_prefix = '/v1/images/%s/' % image_id self.conduct('GET', image_prefix + 'ancestry', auth='sig') - self.conduct('GET', image_prefix + 'json', auth='sig') - self.conduct('GET', image_prefix + 'layer', auth='sig') + response = self.conduct('GET', image_prefix + 'json', auth='sig') + self.assertEquals(image_id, response.json()['id']) + + response = self.conduct('GET', image_prefix + 'layer', auth='sig') + + # Ensure we can parse the layer bytes and that they contain the contents. + self.assertContents(images[index], response) class V2RegistryMixin(BaseRegistryMixin): @@ -474,8 +502,11 @@ class V2RegistryMixin(BaseRegistryMixin): class V2RegistryPushMixin(V2RegistryMixin): + push_version = 'v2' + def do_push(self, namespace, repository, username, password, images=None, tag_name=None, - cancel=False, invalid=False, expect_failure=None, scopes=None): + cancel=False, invalid=False, expect_failure=None, scopes=None, + munge_shas=False): images = images or self._get_default_images() repo_name = _get_repo_name(namespace, repository) @@ -499,8 +530,7 @@ class V2RegistryPushMixin(V2RegistryMixin): full_contents = {} for image_data in images: - full_contents[image_data['id']] = _get_full_contents(image_data) - + full_contents[image_data['id']] = _get_full_contents(image_data, additional_fields=munge_shas) checksum = 'sha256:' + hashlib.sha256(full_contents[image_data['id']]).hexdigest() if invalid: checksum = 'sha256:' + hashlib.sha256('foobarbaz').hexdigest() @@ -595,8 +625,10 @@ class V2RegistryPushMixin(V2RegistryMixin): class V2RegistryPullMixin(V2RegistryMixin): + pull_version = 'v2' + def do_pull(self, namespace, repository, username=None, password='password', expect_failure=None, - manifest_id=None, images=None): + manifest_id=None, images=None, munge_shas=False): images = images or self._get_default_images() repo_name = _get_repo_name(namespace, repository) @@ -630,12 +662,13 @@ class V2RegistryPullMixin(V2RegistryMixin): # Verify the layers. blobs = {} - for layer in manifest_data['fsLayers']: + for index, layer in enumerate(manifest_data['fsLayers']): blob_id = layer['blobSum'] result = self.conduct('GET', '/v2/%s/blobs/%s' % (repo_name, blob_id), expected_code=200, auth='jwt') blobs[blob_id] = result.content + self.assertContents(images[index], result) # Verify the V1 metadata is present for each expected image. found_v1_layers = set() @@ -645,7 +678,7 @@ class V2RegistryPullMixin(V2RegistryMixin): found_v1_layers.add(v1_history['id']) for image in images: - self.assertIn(image['id'], found_v1_layers) + self.assertEquals(not munge_shas, image['id'] in found_v1_layers) return blobs @@ -687,6 +720,35 @@ class V2RegistryLoginMixin(object): class RegistryTestsMixin(object): + def test_push_same_ids_different_sha(self): + if self.push_version == 'v1': + # No SHAs to munge in V1. + return + + images = [ + { + 'id': 'latestid', + 'contents': 'the latest image', + 'parent': 'baseid', + }, + { + 'id': 'baseid', + 'contents': 'The base image', + } + ] + + # Push a new repository. + self.do_push('public', 'newrepo', 'public', 'password', images=images) + + # Pull the repository. + self.do_pull('public', 'newrepo', 'public', 'password', images=images) + + # Push a the repository again, but with different SHAs. + self.do_push('public', 'newrepo', 'public', 'password', images=images, munge_shas=True) + + # Pull the repository. + self.do_pull('public', 'newrepo', 'public', 'password', images=images, munge_shas=True) + def test_push_pull_logging(self): # Push a new repository. self.do_push('public', 'newrepo', 'public', 'password') @@ -986,6 +1048,27 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix RegistryTestCaseMixin, LiveServerTestCase): """ Tests for V2 registry. """ + def test_invalid_blob(self): + namespace = 'devtable' + repository = 'somerepo' + tag_name = 'sometag' + + repo_name = _get_repo_name(namespace, repository) + + self.v2_ping() + self.do_auth('devtable', 'password', namespace, repository, scopes=['push', 'pull']) + + # Build a fake manifest. + builder = SignedManifestBuilder(namespace, repository, tag_name) + builder.add_layer('sha256:' + hashlib.sha256('invalid').hexdigest(), json.dumps({'id': 'foo'})) + manifest = builder.build(_JWK) + + response = self.conduct('PUT', '/v2/%s/manifests/%s' % (repo_name, tag_name), + data=manifest.bytes, expected_code=404, + headers={'Content-Type': 'application/json'}, auth='jwt') + self.assertEquals('BLOB_UNKNOWN', response.json()['errors'][0]['code']) + + def test_delete_manifest(self): # Push a new repo with the latest tag. (_, digest) = self.do_push('devtable', 'newrepo', 'devtable', 'password') @@ -1213,6 +1296,16 @@ class V1PushV2PullRegistryTests(V2RegistryPullMixin, V1RegistryPushMixin, Regist RegistryTestCaseMixin, LiveServerTestCase): """ Tests for V1 push, V2 pull registry. """ + def test_multiple_tag_with_pull(self): + """ Tagging the same exact V1 tag multiple times and then pulling with V2. """ + images = self._get_default_images() + + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) + self.do_pull('devtable', 'newrepo', 'devtable', 'password', images=images) + + self.do_tag('devtable', 'newrepo', 'latest', images[0]['id'], auth=('devtable', 'password')) + self.do_pull('devtable', 'newrepo', 'devtable', 'password', images=images) + class V1PullV2PushRegistryTests(V1RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMixin, RegistryTestCaseMixin, LiveServerTestCase): From 6454b5aeb7860fe2a03578020002a9512f786765 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Fri, 12 Feb 2016 15:57:44 -0500 Subject: [PATCH 2/2] Update the layer rename PR to preserve the original manifest --- endpoints/v2/manifest.py | 70 ++++++++++++++-------------------------- test/registry_tests.py | 2 +- 2 files changed, 25 insertions(+), 47 deletions(-) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index e22a7d423..df116a87d 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -185,13 +185,6 @@ class SignedManifestBuilder(object): _V1_COMPAT_KEY: v1_json_metadata, }) - def add_top_layer(self, layer_digest, v1_json_metadata): - self._fs_layer_digests.insert(0, { - _BLOB_SUM_KEY: layer_digest, - }) - self._history.insert(0, { - _V1_COMPAT_KEY: v1_json_metadata, - }) def build(self, json_web_key): """ Build the payload and sign it, returning a SignedManifest object. @@ -395,53 +388,33 @@ def _write_manifest(namespace, repo_name, manifest): # a storage with a content checksum different from the existing, then we need to rewrite # the Docker ID to ensure consistency. tag_name = manifest.tag - updated_manifest_builder = SignedManifestBuilder(namespace, repo_name, tag_name) - has_updated_manifest = False + has_rewritten_ids = False updated_id_map = {} for mdata in layers: digest_str = str(mdata.digest) v1_mdata = mdata.v1_metadata - updated_id_map[v1_mdata.docker_id] = v1_mdata.docker_id + working_docker_id = v1_mdata.docker_id # Ensure that all blobs exist. blob_storage = storage_map.get(digest_str) if blob_storage is None: raise BlobUnknown(detail={'digest': digest_str}) - if v1_mdata.docker_id in images_map: - # Ensure that the V1 image's storage matches the V2 blob. If not, we've found - # a data inconsistency and need to create a new layer ID for the V1 image. - v1_image = images_map[v1_mdata.docker_id] - if has_updated_manifest or v1_image.storage.content_checksum != digest_str: - new_synthetic_id = hashlib.sha256(mdata.v1_metadata_str + '@' + digest_str).hexdigest() - logger.debug('Got content mismatch for layer %s under repo %s/%s. New ID: %s', - v1_mdata.docker_id, namespace, repo_name, new_synthetic_id) + # Ensure that the V1 image's storage matches the V2 blob. If not, we've found + # a data inconsistency and need to create a new layer ID for the V1 image, and all images + # that follow it in the ancestry chain. + if ((v1_mdata.docker_id in images_map and + images_map[v1_mdata.docker_id].storage.content_checksum != digest_str) or + has_rewritten_ids): - updated_id_map[v1_mdata.docker_id] = new_synthetic_id - has_updated_manifest = True + working_docker_id = hashlib.sha256(mdata.v1_metadata_str + '@' + digest_str).hexdigest() + logger.debug('Rewriting docker_id %s/%s %s -> %s', namespace, repo_name, v1_mdata.docker_id, + working_docker_id) + has_rewritten_ids = True - # Update the manifest withn the new ID (if any). - v1_metadata_json = mdata.v1_metadata_str - if has_updated_manifest: - v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map) - - updated_manifest_builder.add_top_layer(digest_str, v1_metadata_json) - - # If the manifest was changed due to an updated layer ID, then create a new manifest - # based on the updated data. - if has_updated_manifest: - manifest = updated_manifest_builder.build(docker_v2_signing_key) - layers = list(manifest.layers) - - # Synthesize the V1 metadata for each layer. - for mdata in layers: - v1_mdata = mdata.v1_metadata - - # If the layer with the V1 id already exists, then nothing more to do. We've ensured - # it points to the correct content SHA above. - if v1_mdata.docker_id in images_map: - continue + # Store the new docker id in the map + updated_id_map[v1_mdata.docker_id] = working_docker_id # Lookup the parent image for the layer, if any. parent_image = None @@ -454,9 +427,14 @@ def _write_manifest(namespace, repo_name, manifest): # Synthesize and store the v1 metadata in the db. digest_str = str(mdata.digest) blob_storage = storage_map[digest_str] - image = model.image.synthesize_v1_image(repo, blob_storage, v1_mdata.docker_id, + + v1_metadata_json = mdata.v1_metadata_str + if has_rewritten_ids: + v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map) + + image = model.image.synthesize_v1_image(repo, blob_storage, working_docker_id, v1_mdata.created, v1_mdata.comment, v1_mdata.command, - mdata.v1_metadata_str, parent_image) + v1_metadata_json, parent_image) images_map[v1_mdata.docker_id] = image @@ -466,9 +444,9 @@ def _write_manifest(namespace, repo_name, manifest): # Store the manifest pointing to the tag. manifest_digest = manifest.digest - leaf_layer = layers[-1] - model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer.v1_metadata.docker_id, - manifest_digest, manifest.bytes) + leaf_layer_id = images_map[layers[-1].v1_metadata.docker_id].docker_image_id + model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer_id, manifest_digest, + manifest.bytes) # Spawn the repo_push event. event_data = { diff --git a/test/registry_tests.py b/test/registry_tests.py index 8f4f96d49..d2507405a 100644 --- a/test/registry_tests.py +++ b/test/registry_tests.py @@ -678,7 +678,7 @@ class V2RegistryPullMixin(V2RegistryMixin): found_v1_layers.add(v1_history['id']) for image in images: - self.assertEquals(not munge_shas, image['id'] in found_v1_layers) + self.assertIn(image['id'], found_v1_layers) return blobs