From abd2e3c234b68d41c472c985fef1a059f3fc3db2 Mon Sep 17 00:00:00 2001
From: Joseph Schorr <josephschorr@users.noreply.github.com>
Date: Fri, 12 Feb 2016 17:39:27 +0200
Subject: [PATCH 1/2] V1 Docker ID <-> V2 layer SHA mismatch fix

Fix handling of V1 Docker ID <-> V2 layer SHA mismatch by dynamically rewriting the manifest to use new synthesized IDs for all layers above the mismatch. Also adds a bunch of tests for this and other use cases, fixes a bug around manifest digest uniqueness and fixes the 5.5 migration for MySQL.
---
 data/database.py                              |   2 +-
 ...2f_add_created_field_to_the_blobupload_.py |   2 +-
 ...77_remove_uniqueness_constraint_on_the_.py |  28 ++++
 data/model/image.py                           |  13 +-
 endpoints/v2/manifest.py                      | 101 +++++++++---
 test/registry_tests.py                        | 147 ++++++++++++++----
 6 files changed, 240 insertions(+), 53 deletions(-)
 create mode 100644 data/migrations/versions/e4129c93e477_remove_uniqueness_constraint_on_the_.py

diff --git a/data/database.py b/data/database.py
index e4a0ad957..84f8b63db 100644
--- a/data/database.py
+++ b/data/database.py
@@ -642,7 +642,7 @@ class RepositoryTag(BaseModel):
 
 class TagManifest(BaseModel):
   tag = ForeignKeyField(RepositoryTag, index=True, unique=True)
-  digest = CharField(index=True, unique=True)
+  digest = CharField(index=True)
   json_data = TextField()
 
 
diff --git a/data/migrations/versions/88e0f440a2f_add_created_field_to_the_blobupload_.py b/data/migrations/versions/88e0f440a2f_add_created_field_to_the_blobupload_.py
index 1d4241b89..0fd70fc87 100644
--- a/data/migrations/versions/88e0f440a2f_add_created_field_to_the_blobupload_.py
+++ b/data/migrations/versions/88e0f440a2f_add_created_field_to_the_blobupload_.py
@@ -16,7 +16,7 @@ from datetime import datetime
 
 def upgrade(tables):
     ### commands auto generated by Alembic - please adjust! ###
-    now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    now = datetime.now().strftime("'%Y-%m-%d %H:%M:%S'")
     op.add_column('blobupload', sa.Column('created', sa.DateTime(), nullable=False, server_default=sa.text(now)))
     op.create_index('blobupload_created', 'blobupload', ['created'], unique=False)
     ### end Alembic commands ###
diff --git a/data/migrations/versions/e4129c93e477_remove_uniqueness_constraint_on_the_.py b/data/migrations/versions/e4129c93e477_remove_uniqueness_constraint_on_the_.py
new file mode 100644
index 000000000..08cc02c7c
--- /dev/null
+++ b/data/migrations/versions/e4129c93e477_remove_uniqueness_constraint_on_the_.py
@@ -0,0 +1,28 @@
+"""Remove uniqueness constraint on the TagManifest digest column
+
+Revision ID: e4129c93e477
+Revises: 956a0833223
+Create Date: 2016-02-12 17:22:48.039791
+
+"""
+
+# revision identifiers, used by Alembic.
+revision = 'e4129c93e477'
+down_revision = '956a0833223'
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+
+def upgrade(tables):
+    ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('tagmanifest_digest', table_name='tagmanifest')
+    op.create_index('tagmanifest_digest', 'tagmanifest', ['digest'], unique=False)
+    ### end Alembic commands ###
+
+
+def downgrade(tables):
+    ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index('tagmanifest_digest', table_name='tagmanifest')
+    op.create_index('tagmanifest_digest', 'tagmanifest', ['digest'], unique=True)
+    ### end Alembic commands ###
diff --git a/data/model/image.py b/data/model/image.py
index 0665c5750..f8a699273 100644
--- a/data/model/image.py
+++ b/data/model/image.py
@@ -2,7 +2,7 @@ import logging
 import dateutil.parser
 import random
 
-from peewee import JOIN_LEFT_OUTER, SQL
+from peewee import JOIN_LEFT_OUTER, IntegrityError
 from datetime import datetime
 
 from data.model import (DataModelException, db_transaction, _basequery, storage,
@@ -405,10 +405,13 @@ def synthesize_v1_image(repo, image_storage, docker_image_id, created_date_str,
   aggregate_size = _basequery.calculate_image_aggregate_size(ancestors, image_storage.image_size,
                                                              parent_image)
 
-  return Image.create(docker_image_id=docker_image_id, ancestors=ancestors, comment=comment,
-                      command=command, v1_json_metadata=v1_json_metadata, created=created,
-                      storage=image_storage, repository=repo, parent=parent_image,
-                      aggregate_size=aggregate_size)
+  try:
+    return Image.create(docker_image_id=docker_image_id, ancestors=ancestors, comment=comment,
+                        command=command, v1_json_metadata=v1_json_metadata, created=created,
+                        storage=image_storage, repository=repo, parent=parent_image,
+                        aggregate_size=aggregate_size)
+  except IntegrityError:
+    return Image.get(docker_image_id=docker_image_id, repository=repo)
 
 
 def ensure_image_locations(*names):
diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py
index da4fccbd8..e22a7d423 100644
--- a/endpoints/v2/manifest.py
+++ b/endpoints/v2/manifest.py
@@ -2,9 +2,10 @@ import logging
 import jwt.utils
 import json
 import features
+import hashlib
 
 from peewee import IntegrityError
-from flask import make_response, request, url_for, abort
+from flask import make_response, request, url_for
 from collections import namedtuple, OrderedDict
 from jwkest.jws import SIGNER_ALGS, keyrep
 from datetime import datetime
@@ -126,7 +127,8 @@ class SignedManifest(object):
       try:
         image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY])
       except digest_tools.InvalidDigestException:
-        raise ManifestInvalid()
+        err_message = 'could not parse manifest digest: %s' % blob_sum_obj[_BLOB_SUM_KEY]
+        raise ManifestInvalid(detail={'message': err_message})
 
       metadata_string = history_obj[_V1_COMPAT_KEY]
 
@@ -134,6 +136,9 @@ class SignedManifest(object):
       command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
       command = json.dumps(command_list) if command_list else None
 
+      if not 'id' in v1_metadata:
+        raise ManifestInvalid(detail={'message': 'invalid manifest v1 history'})
+
       extracted = ExtractedV1Metadata(v1_metadata['id'], v1_metadata.get('parent'),
                                       v1_metadata.get('created'), v1_metadata.get('comment'),
                                       command)
@@ -180,6 +185,14 @@ class SignedManifestBuilder(object):
       _V1_COMPAT_KEY: v1_json_metadata,
     })
 
+  def add_top_layer(self, layer_digest, v1_json_metadata):
+    self._fs_layer_digests.insert(0, {
+      _BLOB_SUM_KEY: layer_digest,
+    })
+    self._history.insert(0, {
+      _V1_COMPAT_KEY: v1_json_metadata,
+    })
+
   def build(self, json_web_key):
     """ Build the payload and sign it, returning a SignedManifest object.
     """
@@ -301,8 +314,8 @@ def _reject_manifest2_schema2(func):
 def write_manifest_by_tagname(namespace, repo_name, manifest_ref):
   try:
     manifest = SignedManifest(request.data)
-  except ValueError as ve:
-    raise ManifestInvalid()
+  except ValueError:
+    raise ManifestInvalid(detail={'message': 'could not parse manifest'})
 
   if manifest.tag != manifest_ref:
     raise TagInvalid()
@@ -320,14 +333,29 @@ def write_manifest_by_digest(namespace, repo_name, manifest_ref):
   try:
     manifest = SignedManifest(request.data)
   except ValueError:
-    raise ManifestInvalid()
+    raise ManifestInvalid(detail={'message': 'could not parse manifest'})
 
   if manifest.digest != manifest_ref:
-    raise ManifestInvalid()
+    raise ManifestInvalid(detail={'message': 'manifest digest mismatch'})
 
   return _write_manifest(namespace, repo_name, manifest)
 
 
+def _updated_v1_metadata(v1_metadata_json, updated_id_map):
+  parsed = json.loads(v1_metadata_json)
+  parsed['id'] = updated_id_map[parsed['id']]
+
+  if parsed.get('parent'):
+    parsed['parent'] = updated_id_map[parsed['parent']]
+
+  if parsed.get('container_config', {}).get('Image'):
+    existing_image = parsed['container_config']['Image']
+    if existing_image in updated_id_map:
+      parsed['container_config']['image'] = updated_id_map[existing_image]
+
+  return json.dumps(parsed)
+
+
 def _write_manifest(namespace, repo_name, manifest):
   # Ensure that the manifest is for this repository. If the manifest's namespace is empty, then
   # it is for the library namespace and we need an extra check.
@@ -363,22 +391,56 @@ def _write_manifest(namespace, repo_name, manifest):
   storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums)
   storage_map = {storage.content_checksum: storage for storage in storage_query}
 
-  # Synthesize the V1 metadata for each layer.
-  manifest_digest = manifest.digest
+  # Ensure that we have valid V1 docker IDs. If Docker gives us a V1 layer ID pointing to
+  # a storage with a content checksum different from the existing, then we need to rewrite
+  # the Docker ID to ensure consistency.
   tag_name = manifest.tag
+  updated_manifest_builder = SignedManifestBuilder(namespace, repo_name, tag_name)
+  has_updated_manifest = False
+  updated_id_map = {}
 
   for mdata in layers:
     digest_str = str(mdata.digest)
     v1_mdata = mdata.v1_metadata
+    updated_id_map[v1_mdata.docker_id] = v1_mdata.docker_id
+
+    # Ensure that all blobs exist.
+    blob_storage = storage_map.get(digest_str)
+    if blob_storage is None:
+      raise BlobUnknown(detail={'digest': digest_str})
 
-    # If there is already a V1 image for this layer, nothing more to do.
     if v1_mdata.docker_id in images_map:
       # Ensure that the V1 image's storage matches the V2 blob. If not, we've found
-      # a data inconsistency and need to fail.
+      # a data inconsistency and need to create a new layer ID for the V1 image.
       v1_image = images_map[v1_mdata.docker_id]
-      if v1_image.storage.content_checksum != digest_str:
-        logger.error('Checksum mismatch on V1 layer %s (#%s): Expected digest %s, found %s',
-                     v1_mdata.docker_id, v1_image.id, digest_str, v1_image.storage.content_checksum)
+      if has_updated_manifest or v1_image.storage.content_checksum != digest_str:
+        new_synthetic_id = hashlib.sha256(mdata.v1_metadata_str + '@' + digest_str).hexdigest()
+        logger.debug('Got content mismatch for layer %s under repo %s/%s. New ID: %s',
+                     v1_mdata.docker_id, namespace, repo_name, new_synthetic_id)
+
+        updated_id_map[v1_mdata.docker_id] = new_synthetic_id
+        has_updated_manifest = True
+
+    # Update the manifest withn the new ID (if any).
+    v1_metadata_json = mdata.v1_metadata_str
+    if has_updated_manifest:
+      v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map)
+
+    updated_manifest_builder.add_top_layer(digest_str, v1_metadata_json)
+
+  # If the manifest was changed due to an updated layer ID, then create a new manifest
+  # based on the updated data.
+  if has_updated_manifest:
+    manifest = updated_manifest_builder.build(docker_v2_signing_key)
+    layers = list(manifest.layers)
+
+  # Synthesize the V1 metadata for each layer.
+  for mdata in layers:
+    v1_mdata = mdata.v1_metadata
+
+    # If the layer with the V1 id already exists, then nothing more to do. We've ensured
+    # it points to the correct content SHA above.
+    if v1_mdata.docker_id in images_map:
       continue
 
     # Lookup the parent image for the layer, if any.
@@ -390,10 +452,8 @@ def _write_manifest(namespace, repo_name, manifest):
         raise ManifestInvalid(detail={'message': msg})
 
     # Synthesize and store the v1 metadata in the db.
-    blob_storage = storage_map.get(digest_str)
-    if blob_storage is None:
-      raise BlobUnknown(detail={'digest': digest_str})
-
+    digest_str = str(mdata.digest)
+    blob_storage = storage_map[digest_str]
     image = model.image.synthesize_v1_image(repo, blob_storage, v1_mdata.docker_id,
                                             v1_mdata.created, v1_mdata.comment, v1_mdata.command,
                                             mdata.v1_metadata_str, parent_image)
@@ -405,9 +465,10 @@ def _write_manifest(namespace, repo_name, manifest):
     raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'})
 
   # Store the manifest pointing to the tag.
+  manifest_digest = manifest.digest
   leaf_layer = layers[-1]
   model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer.v1_metadata.docker_id,
-                               manifest_digest, request.data)
+                               manifest_digest, manifest.bytes)
 
   # Spawn the repo_push event.
   event_data = {
@@ -481,9 +542,11 @@ def _generate_and_store_manifest(namespace, repo_name, tag_name):
   try:
     return model.tag.associate_generated_tag_manifest(namespace, repo_name, tag_name,
                                                       manifest.digest, manifest.bytes)
-  except IntegrityError:
+  except IntegrityError as ie:
+    logger.debug('Got integrity error: %s', ie)
     try:
       return model.tag.load_tag_manifest(namespace, repo_name, tag_name)
     except model.InvalidManifestException:
+      logger.exception('Exception when generating manifest')
       raise model.DataModelException('Could not load or generate manifest')
 
diff --git a/test/registry_tests.py b/test/registry_tests.py
index a5770bc8a..8f4f96d49 100644
--- a/test/registry_tests.py
+++ b/test/registry_tests.py
@@ -6,6 +6,7 @@ import random
 import string
 import resumablehashlib
 import binascii
+import uuid
 
 import Crypto.Random
 from cachetools import lru_cache
@@ -166,23 +167,29 @@ def _get_repo_name(namespace, name):
   return '%s/%s' % (namespace, name)
 
 
-def _get_full_contents(image_data):
+def _get_full_contents(image_data, additional_fields=False):
   if 'chunks' in image_data:
     # Data is just for chunking; no need for a real TAR.
     return image_data['contents']
 
   layer_data = StringIO()
 
-  tar_file_info = tarfile.TarInfo(name='contents')
-  tar_file_info.type = tarfile.REGTYPE
-  tar_file_info.size = len(image_data['contents'])
+  def add_file(name, contents):
+    tar_file_info = tarfile.TarInfo(name=name)
+    tar_file_info.type = tarfile.REGTYPE
+    tar_file_info.size = len(contents)
 
-  tar_file = tarfile.open(fileobj=layer_data, mode='w|gz')
-  tar_file.addfile(tar_file_info, StringIO(image_data['contents']))
-  tar_file.close()
+    tar_file = tarfile.open(fileobj=layer_data, mode='w|gz')
+    tar_file.addfile(tar_file_info, StringIO(contents))
+    tar_file.close()
+
+  add_file('contents', image_data['contents'])
+  if additional_fields:
+    add_file('anotherfile', str(uuid.uuid4()))
 
   layer_bytes = layer_data.getvalue()
   layer_data.close()
+
   return layer_bytes
 
 
@@ -240,10 +247,10 @@ class RegistryTestCaseMixin(LiveServerTestCase):
     self.csrf_token = ''
     self.csrf_token = self.conduct('GET', '/__test/csrf').text
 
-  def do_tag(self, namespace, repository, tag, image_id, expected_code=200):
+  def do_tag(self, namespace, repository, tag, image_id, expected_code=200, auth='sig'):
     repo_name = _get_repo_name(namespace, repository)
     self.conduct('PUT', '/v1/repositories/%s/tags/%s' % (repo_name, tag),
-                 data='"%s"' % image_id, expected_code=expected_code, auth='sig')
+                 data='"%s"' % image_id, expected_code=expected_code, auth=auth)
 
   def conduct_api_login(self, username, password):
     self.conduct('POST', '/api/v1/signin',
@@ -256,6 +263,13 @@ class RegistryTestCaseMixin(LiveServerTestCase):
                  data=json.dumps(dict(visibility=visibility)),
                  headers={'Content-Type': 'application/json'})
 
+  def assertContents(self, image_data, response):
+    if 'chunks' in image_data:
+      return
+
+    tar = tarfile.open(fileobj=StringIO(response.content))
+    self.assertEquals(tar.extractfile('contents').read(), image_data['contents'])
+
 
 class BaseRegistryMixin(object):
   def conduct(self, method, url, headers=None, data=None, auth=None, params=None, expected_code=200,
@@ -311,7 +325,10 @@ class V1RegistryMixin(BaseRegistryMixin):
 
 
 class V1RegistryPushMixin(V1RegistryMixin):
-  def do_push(self, namespace, repository, username, password, images=None, expect_failure=None):
+  push_version = 'v1'
+
+  def do_push(self, namespace, repository, username, password, images=None, expect_failure=None,
+              munge_shas=False):
     images = images or self._get_default_images()
     auth = (username, password)
     repo_name = _get_repo_name(namespace, repository)
@@ -328,7 +345,6 @@ class V1RegistryPushMixin(V1RegistryMixin):
     if expected_code != 201:
       return
 
-    last_image_id = None
     for image_data in images:
       image_id = image_data['id']
       last_image_id = image_id
@@ -363,8 +379,10 @@ class V1RegistryPushMixin(V1RegistryMixin):
 
 
 class V1RegistryPullMixin(V1RegistryMixin):
+  pull_version = 'v1'
+
   def do_pull(self, namespace, repository, username=None, password='password', expect_failure=None,
-              images=None):
+              images=None, munge_shas=False):
     images = images or self._get_default_images()
     repo_name = _get_repo_name(namespace, repository)
 
@@ -377,27 +395,37 @@ class V1RegistryPullMixin(V1RegistryMixin):
 
     prefix = '/v1/repositories/%s/' % repo_name
 
-    # GET /v1/repositories/{namespace}/{repository}/
+    # GET /v1/repositories/{namespace}/{repository}/images
     expected_code = _get_expected_code(expect_failure, 1, 200)
     self.conduct('GET', prefix + 'images', auth=auth, expected_code=expected_code)
     if expected_code != 200:
       return
 
-    # GET /v1/repositories/{namespace}/{repository}/
-    result = json.loads(self.conduct('GET', prefix + 'tags', auth='sig').text)
+    # GET /v1/repositories/{namespace}/{repository}/tags
+    tags_result = json.loads(self.conduct('GET', prefix + 'tags', auth='sig').text)
+    self.assertEquals(1, len(tags_result.values()))
 
-    self.assertEquals(len(images), len(result.values()))
+    # Ensure we do (or do not) have a matching image ID.
+    tag_image_id = tags_result['latest']
+    known_ids = [item['id'] for item in images]
 
-    for image_data in images:
-      image_id = image_data['id']
-      self.assertIn(image_id, result.values())
+    self.assertEquals(not munge_shas, tag_image_id in known_ids)
 
+    # Retrieve the ancestry of the tag image.
+    image_prefix = '/v1/images/%s/' % tag_image_id
+    ancestors = self.conduct('GET', image_prefix + 'ancestry', auth='sig').json()
+    for index, image_id in enumerate(ancestors):
       # /v1/images/{imageID}/{ancestry, json, layer}
       image_prefix = '/v1/images/%s/' % image_id
       self.conduct('GET', image_prefix + 'ancestry', auth='sig')
-      self.conduct('GET', image_prefix + 'json', auth='sig')
-      self.conduct('GET', image_prefix + 'layer', auth='sig')
 
+      response = self.conduct('GET', image_prefix + 'json', auth='sig')
+      self.assertEquals(image_id, response.json()['id'])
+
+      response = self.conduct('GET', image_prefix + 'layer', auth='sig')
+
+      # Ensure we can parse the layer bytes and that they contain the contents.
+      self.assertContents(images[index], response)
 
 
 class V2RegistryMixin(BaseRegistryMixin):
@@ -474,8 +502,11 @@ class V2RegistryMixin(BaseRegistryMixin):
 
 
 class V2RegistryPushMixin(V2RegistryMixin):
+  push_version = 'v2'
+
   def do_push(self, namespace, repository, username, password, images=None, tag_name=None,
-              cancel=False, invalid=False, expect_failure=None, scopes=None):
+              cancel=False, invalid=False, expect_failure=None, scopes=None,
+              munge_shas=False):
     images = images or self._get_default_images()
     repo_name = _get_repo_name(namespace, repository)
 
@@ -499,8 +530,7 @@ class V2RegistryPushMixin(V2RegistryMixin):
     full_contents = {}
 
     for image_data in images:
-      full_contents[image_data['id']] = _get_full_contents(image_data)
-
+      full_contents[image_data['id']] = _get_full_contents(image_data, additional_fields=munge_shas)
       checksum = 'sha256:' + hashlib.sha256(full_contents[image_data['id']]).hexdigest()
       if invalid:
         checksum = 'sha256:' + hashlib.sha256('foobarbaz').hexdigest()
@@ -595,8 +625,10 @@ class V2RegistryPushMixin(V2RegistryMixin):
 
 
 class V2RegistryPullMixin(V2RegistryMixin):
+  pull_version = 'v2'
+
   def do_pull(self, namespace, repository, username=None, password='password', expect_failure=None,
-              manifest_id=None, images=None):
+              manifest_id=None, images=None, munge_shas=False):
     images = images or self._get_default_images()
     repo_name = _get_repo_name(namespace, repository)
 
@@ -630,12 +662,13 @@ class V2RegistryPullMixin(V2RegistryMixin):
 
     # Verify the layers.
     blobs = {}
-    for layer in manifest_data['fsLayers']:
+    for index, layer in enumerate(manifest_data['fsLayers']):
       blob_id = layer['blobSum']
       result = self.conduct('GET', '/v2/%s/blobs/%s' % (repo_name, blob_id),
                             expected_code=200, auth='jwt')
 
       blobs[blob_id] = result.content
+      self.assertContents(images[index], result)
 
     # Verify the V1 metadata is present for each expected image.
     found_v1_layers = set()
@@ -645,7 +678,7 @@ class V2RegistryPullMixin(V2RegistryMixin):
       found_v1_layers.add(v1_history['id'])
 
     for image in images:
-      self.assertIn(image['id'], found_v1_layers)
+      self.assertEquals(not munge_shas, image['id'] in found_v1_layers)
 
     return blobs
 
@@ -687,6 +720,35 @@ class V2RegistryLoginMixin(object):
 
 
 class RegistryTestsMixin(object):
+  def test_push_same_ids_different_sha(self):
+    if self.push_version == 'v1':
+      # No SHAs to munge in V1.
+      return
+
+    images = [
+      {
+        'id': 'latestid',
+        'contents': 'the latest image',
+        'parent': 'baseid',
+      },
+      {
+        'id': 'baseid',
+        'contents': 'The base image',
+      }
+    ]
+
+    # Push a new repository.
+    self.do_push('public', 'newrepo', 'public', 'password', images=images)
+
+    # Pull the repository.
+    self.do_pull('public', 'newrepo', 'public', 'password', images=images)
+
+    # Push a the repository again, but with different SHAs.
+    self.do_push('public', 'newrepo', 'public', 'password', images=images, munge_shas=True)
+
+    # Pull the repository.
+    self.do_pull('public', 'newrepo', 'public', 'password', images=images, munge_shas=True)
+
   def test_push_pull_logging(self):
     # Push a new repository.
     self.do_push('public', 'newrepo', 'public', 'password')
@@ -986,6 +1048,27 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
                       RegistryTestCaseMixin, LiveServerTestCase):
   """ Tests for V2 registry. """
 
+  def test_invalid_blob(self):
+    namespace = 'devtable'
+    repository = 'somerepo'
+    tag_name = 'sometag'
+
+    repo_name = _get_repo_name(namespace, repository)
+
+    self.v2_ping()
+    self.do_auth('devtable', 'password', namespace, repository, scopes=['push', 'pull'])
+
+    # Build a fake manifest.
+    builder = SignedManifestBuilder(namespace, repository, tag_name)
+    builder.add_layer('sha256:' + hashlib.sha256('invalid').hexdigest(), json.dumps({'id': 'foo'}))
+    manifest = builder.build(_JWK)
+
+    response = self.conduct('PUT', '/v2/%s/manifests/%s' % (repo_name, tag_name),
+                            data=manifest.bytes, expected_code=404,
+                            headers={'Content-Type': 'application/json'}, auth='jwt')
+    self.assertEquals('BLOB_UNKNOWN', response.json()['errors'][0]['code'])
+
+
   def test_delete_manifest(self):
     # Push a new repo with the latest tag.
     (_, digest) = self.do_push('devtable', 'newrepo', 'devtable', 'password')
@@ -1213,6 +1296,16 @@ class V1PushV2PullRegistryTests(V2RegistryPullMixin, V1RegistryPushMixin, Regist
                                 RegistryTestCaseMixin, LiveServerTestCase):
   """ Tests for V1 push, V2 pull registry. """
 
+  def test_multiple_tag_with_pull(self):
+    """ Tagging the same exact V1 tag multiple times and then pulling with V2. """
+    images = self._get_default_images()
+
+    self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)
+    self.do_pull('devtable', 'newrepo', 'devtable', 'password', images=images)
+
+    self.do_tag('devtable', 'newrepo', 'latest', images[0]['id'], auth=('devtable', 'password'))
+    self.do_pull('devtable', 'newrepo', 'devtable', 'password', images=images)
+
 
 class V1PullV2PushRegistryTests(V1RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMixin,
                                 RegistryTestCaseMixin, LiveServerTestCase):

From 6454b5aeb7860fe2a03578020002a9512f786765 Mon Sep 17 00:00:00 2001
From: Jake Moshenko <jake.moshenko@coreos.com>
Date: Fri, 12 Feb 2016 15:57:44 -0500
Subject: [PATCH 2/2] Update the layer rename PR to preserve the original
 manifest

---
 endpoints/v2/manifest.py | 70 ++++++++++++++--------------------------
 test/registry_tests.py   |  2 +-
 2 files changed, 25 insertions(+), 47 deletions(-)

diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py
index e22a7d423..df116a87d 100644
--- a/endpoints/v2/manifest.py
+++ b/endpoints/v2/manifest.py
@@ -185,13 +185,6 @@ class SignedManifestBuilder(object):
       _V1_COMPAT_KEY: v1_json_metadata,
     })
 
-  def add_top_layer(self, layer_digest, v1_json_metadata):
-    self._fs_layer_digests.insert(0, {
-      _BLOB_SUM_KEY: layer_digest,
-    })
-    self._history.insert(0, {
-      _V1_COMPAT_KEY: v1_json_metadata,
-    })
 
   def build(self, json_web_key):
     """ Build the payload and sign it, returning a SignedManifest object.
@@ -395,53 +388,33 @@ def _write_manifest(namespace, repo_name, manifest):
   # a storage with a content checksum different from the existing, then we need to rewrite
   # the Docker ID to ensure consistency.
   tag_name = manifest.tag
-  updated_manifest_builder = SignedManifestBuilder(namespace, repo_name, tag_name)
-  has_updated_manifest = False
+  has_rewritten_ids = False
   updated_id_map = {}
 
   for mdata in layers:
     digest_str = str(mdata.digest)
     v1_mdata = mdata.v1_metadata
-    updated_id_map[v1_mdata.docker_id] = v1_mdata.docker_id
+    working_docker_id = v1_mdata.docker_id
 
     # Ensure that all blobs exist.
     blob_storage = storage_map.get(digest_str)
     if blob_storage is None:
       raise BlobUnknown(detail={'digest': digest_str})
 
-    if v1_mdata.docker_id in images_map:
-      # Ensure that the V1 image's storage matches the V2 blob. If not, we've found
-      # a data inconsistency and need to create a new layer ID for the V1 image.
-      v1_image = images_map[v1_mdata.docker_id]
-      if has_updated_manifest or v1_image.storage.content_checksum != digest_str:
-        new_synthetic_id = hashlib.sha256(mdata.v1_metadata_str + '@' + digest_str).hexdigest()
-        logger.debug('Got content mismatch for layer %s under repo %s/%s. New ID: %s',
-                     v1_mdata.docker_id, namespace, repo_name, new_synthetic_id)
+    # Ensure that the V1 image's storage matches the V2 blob. If not, we've found
+    # a data inconsistency and need to create a new layer ID for the V1 image, and all images
+    # that follow it in the ancestry chain.
+    if ((v1_mdata.docker_id in images_map and
+         images_map[v1_mdata.docker_id].storage.content_checksum != digest_str) or
+        has_rewritten_ids):
 
-        updated_id_map[v1_mdata.docker_id] = new_synthetic_id
-        has_updated_manifest = True
+      working_docker_id = hashlib.sha256(mdata.v1_metadata_str + '@' + digest_str).hexdigest()
+      logger.debug('Rewriting docker_id %s/%s %s -> %s', namespace, repo_name, v1_mdata.docker_id,
+                   working_docker_id)
+      has_rewritten_ids = True
 
-    # Update the manifest withn the new ID (if any).
-    v1_metadata_json = mdata.v1_metadata_str
-    if has_updated_manifest:
-      v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map)
-
-    updated_manifest_builder.add_top_layer(digest_str, v1_metadata_json)
-
-  # If the manifest was changed due to an updated layer ID, then create a new manifest
-  # based on the updated data.
-  if has_updated_manifest:
-    manifest = updated_manifest_builder.build(docker_v2_signing_key)
-    layers = list(manifest.layers)
-
-  # Synthesize the V1 metadata for each layer.
-  for mdata in layers:
-    v1_mdata = mdata.v1_metadata
-
-    # If the layer with the V1 id already exists, then nothing more to do. We've ensured
-    # it points to the correct content SHA above.
-    if v1_mdata.docker_id in images_map:
-      continue
+    # Store the new docker id in the map
+    updated_id_map[v1_mdata.docker_id] = working_docker_id
 
     # Lookup the parent image for the layer, if any.
     parent_image = None
@@ -454,9 +427,14 @@ def _write_manifest(namespace, repo_name, manifest):
     # Synthesize and store the v1 metadata in the db.
     digest_str = str(mdata.digest)
     blob_storage = storage_map[digest_str]
-    image = model.image.synthesize_v1_image(repo, blob_storage, v1_mdata.docker_id,
+
+    v1_metadata_json = mdata.v1_metadata_str
+    if has_rewritten_ids:
+      v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map)
+
+    image = model.image.synthesize_v1_image(repo, blob_storage, working_docker_id,
                                             v1_mdata.created, v1_mdata.comment, v1_mdata.command,
-                                            mdata.v1_metadata_str, parent_image)
+                                            v1_metadata_json, parent_image)
 
     images_map[v1_mdata.docker_id] = image
 
@@ -466,9 +444,9 @@ def _write_manifest(namespace, repo_name, manifest):
 
   # Store the manifest pointing to the tag.
   manifest_digest = manifest.digest
-  leaf_layer = layers[-1]
-  model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer.v1_metadata.docker_id,
-                               manifest_digest, manifest.bytes)
+  leaf_layer_id = images_map[layers[-1].v1_metadata.docker_id].docker_image_id
+  model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer_id, manifest_digest,
+                               manifest.bytes)
 
   # Spawn the repo_push event.
   event_data = {
diff --git a/test/registry_tests.py b/test/registry_tests.py
index 8f4f96d49..d2507405a 100644
--- a/test/registry_tests.py
+++ b/test/registry_tests.py
@@ -678,7 +678,7 @@ class V2RegistryPullMixin(V2RegistryMixin):
       found_v1_layers.add(v1_history['id'])
 
     for image in images:
-      self.assertEquals(not munge_shas, image['id'] in found_v1_layers)
+      self.assertIn(image['id'], found_v1_layers)
 
     return blobs