V1 Docker ID <-> V2 layer SHA mismatch fix

Fix handling of V1 Docker ID <-> V2 layer SHA mismatch by dynamically rewriting the manifest to use new synthesized IDs for all layers above the mismatch. Also adds a bunch of tests for this and other use cases, fixes a bug around manifest digest uniqueness and fixes the 5.5 migration for MySQL.
This commit is contained in:
Joseph Schorr 2016-02-12 17:39:27 +02:00
parent 8b61c69dad
commit abd2e3c234
6 changed files with 240 additions and 53 deletions

View file

@ -6,6 +6,7 @@ import random
import string
import resumablehashlib
import binascii
import uuid
import Crypto.Random
from cachetools import lru_cache
@ -166,23 +167,29 @@ def _get_repo_name(namespace, name):
return '%s/%s' % (namespace, name)
def _get_full_contents(image_data):
def _get_full_contents(image_data, additional_fields=False):
if 'chunks' in image_data:
# Data is just for chunking; no need for a real TAR.
return image_data['contents']
layer_data = StringIO()
tar_file_info = tarfile.TarInfo(name='contents')
tar_file_info.type = tarfile.REGTYPE
tar_file_info.size = len(image_data['contents'])
def add_file(name, contents):
tar_file_info = tarfile.TarInfo(name=name)
tar_file_info.type = tarfile.REGTYPE
tar_file_info.size = len(contents)
tar_file = tarfile.open(fileobj=layer_data, mode='w|gz')
tar_file.addfile(tar_file_info, StringIO(image_data['contents']))
tar_file.close()
tar_file = tarfile.open(fileobj=layer_data, mode='w|gz')
tar_file.addfile(tar_file_info, StringIO(contents))
tar_file.close()
add_file('contents', image_data['contents'])
if additional_fields:
add_file('anotherfile', str(uuid.uuid4()))
layer_bytes = layer_data.getvalue()
layer_data.close()
return layer_bytes
@ -240,10 +247,10 @@ class RegistryTestCaseMixin(LiveServerTestCase):
self.csrf_token = ''
self.csrf_token = self.conduct('GET', '/__test/csrf').text
def do_tag(self, namespace, repository, tag, image_id, expected_code=200):
def do_tag(self, namespace, repository, tag, image_id, expected_code=200, auth='sig'):
repo_name = _get_repo_name(namespace, repository)
self.conduct('PUT', '/v1/repositories/%s/tags/%s' % (repo_name, tag),
data='"%s"' % image_id, expected_code=expected_code, auth='sig')
data='"%s"' % image_id, expected_code=expected_code, auth=auth)
def conduct_api_login(self, username, password):
self.conduct('POST', '/api/v1/signin',
@ -256,6 +263,13 @@ class RegistryTestCaseMixin(LiveServerTestCase):
data=json.dumps(dict(visibility=visibility)),
headers={'Content-Type': 'application/json'})
def assertContents(self, image_data, response):
if 'chunks' in image_data:
return
tar = tarfile.open(fileobj=StringIO(response.content))
self.assertEquals(tar.extractfile('contents').read(), image_data['contents'])
class BaseRegistryMixin(object):
def conduct(self, method, url, headers=None, data=None, auth=None, params=None, expected_code=200,
@ -311,7 +325,10 @@ class V1RegistryMixin(BaseRegistryMixin):
class V1RegistryPushMixin(V1RegistryMixin):
def do_push(self, namespace, repository, username, password, images=None, expect_failure=None):
push_version = 'v1'
def do_push(self, namespace, repository, username, password, images=None, expect_failure=None,
munge_shas=False):
images = images or self._get_default_images()
auth = (username, password)
repo_name = _get_repo_name(namespace, repository)
@ -328,7 +345,6 @@ class V1RegistryPushMixin(V1RegistryMixin):
if expected_code != 201:
return
last_image_id = None
for image_data in images:
image_id = image_data['id']
last_image_id = image_id
@ -363,8 +379,10 @@ class V1RegistryPushMixin(V1RegistryMixin):
class V1RegistryPullMixin(V1RegistryMixin):
pull_version = 'v1'
def do_pull(self, namespace, repository, username=None, password='password', expect_failure=None,
images=None):
images=None, munge_shas=False):
images = images or self._get_default_images()
repo_name = _get_repo_name(namespace, repository)
@ -377,27 +395,37 @@ class V1RegistryPullMixin(V1RegistryMixin):
prefix = '/v1/repositories/%s/' % repo_name
# GET /v1/repositories/{namespace}/{repository}/
# GET /v1/repositories/{namespace}/{repository}/images
expected_code = _get_expected_code(expect_failure, 1, 200)
self.conduct('GET', prefix + 'images', auth=auth, expected_code=expected_code)
if expected_code != 200:
return
# GET /v1/repositories/{namespace}/{repository}/
result = json.loads(self.conduct('GET', prefix + 'tags', auth='sig').text)
# GET /v1/repositories/{namespace}/{repository}/tags
tags_result = json.loads(self.conduct('GET', prefix + 'tags', auth='sig').text)
self.assertEquals(1, len(tags_result.values()))
self.assertEquals(len(images), len(result.values()))
# Ensure we do (or do not) have a matching image ID.
tag_image_id = tags_result['latest']
known_ids = [item['id'] for item in images]
for image_data in images:
image_id = image_data['id']
self.assertIn(image_id, result.values())
self.assertEquals(not munge_shas, tag_image_id in known_ids)
# Retrieve the ancestry of the tag image.
image_prefix = '/v1/images/%s/' % tag_image_id
ancestors = self.conduct('GET', image_prefix + 'ancestry', auth='sig').json()
for index, image_id in enumerate(ancestors):
# /v1/images/{imageID}/{ancestry, json, layer}
image_prefix = '/v1/images/%s/' % image_id
self.conduct('GET', image_prefix + 'ancestry', auth='sig')
self.conduct('GET', image_prefix + 'json', auth='sig')
self.conduct('GET', image_prefix + 'layer', auth='sig')
response = self.conduct('GET', image_prefix + 'json', auth='sig')
self.assertEquals(image_id, response.json()['id'])
response = self.conduct('GET', image_prefix + 'layer', auth='sig')
# Ensure we can parse the layer bytes and that they contain the contents.
self.assertContents(images[index], response)
class V2RegistryMixin(BaseRegistryMixin):
@ -474,8 +502,11 @@ class V2RegistryMixin(BaseRegistryMixin):
class V2RegistryPushMixin(V2RegistryMixin):
push_version = 'v2'
def do_push(self, namespace, repository, username, password, images=None, tag_name=None,
cancel=False, invalid=False, expect_failure=None, scopes=None):
cancel=False, invalid=False, expect_failure=None, scopes=None,
munge_shas=False):
images = images or self._get_default_images()
repo_name = _get_repo_name(namespace, repository)
@ -499,8 +530,7 @@ class V2RegistryPushMixin(V2RegistryMixin):
full_contents = {}
for image_data in images:
full_contents[image_data['id']] = _get_full_contents(image_data)
full_contents[image_data['id']] = _get_full_contents(image_data, additional_fields=munge_shas)
checksum = 'sha256:' + hashlib.sha256(full_contents[image_data['id']]).hexdigest()
if invalid:
checksum = 'sha256:' + hashlib.sha256('foobarbaz').hexdigest()
@ -595,8 +625,10 @@ class V2RegistryPushMixin(V2RegistryMixin):
class V2RegistryPullMixin(V2RegistryMixin):
pull_version = 'v2'
def do_pull(self, namespace, repository, username=None, password='password', expect_failure=None,
manifest_id=None, images=None):
manifest_id=None, images=None, munge_shas=False):
images = images or self._get_default_images()
repo_name = _get_repo_name(namespace, repository)
@ -630,12 +662,13 @@ class V2RegistryPullMixin(V2RegistryMixin):
# Verify the layers.
blobs = {}
for layer in manifest_data['fsLayers']:
for index, layer in enumerate(manifest_data['fsLayers']):
blob_id = layer['blobSum']
result = self.conduct('GET', '/v2/%s/blobs/%s' % (repo_name, blob_id),
expected_code=200, auth='jwt')
blobs[blob_id] = result.content
self.assertContents(images[index], result)
# Verify the V1 metadata is present for each expected image.
found_v1_layers = set()
@ -645,7 +678,7 @@ class V2RegistryPullMixin(V2RegistryMixin):
found_v1_layers.add(v1_history['id'])
for image in images:
self.assertIn(image['id'], found_v1_layers)
self.assertEquals(not munge_shas, image['id'] in found_v1_layers)
return blobs
@ -687,6 +720,35 @@ class V2RegistryLoginMixin(object):
class RegistryTestsMixin(object):
def test_push_same_ids_different_sha(self):
if self.push_version == 'v1':
# No SHAs to munge in V1.
return
images = [
{
'id': 'latestid',
'contents': 'the latest image',
'parent': 'baseid',
},
{
'id': 'baseid',
'contents': 'The base image',
}
]
# Push a new repository.
self.do_push('public', 'newrepo', 'public', 'password', images=images)
# Pull the repository.
self.do_pull('public', 'newrepo', 'public', 'password', images=images)
# Push a the repository again, but with different SHAs.
self.do_push('public', 'newrepo', 'public', 'password', images=images, munge_shas=True)
# Pull the repository.
self.do_pull('public', 'newrepo', 'public', 'password', images=images, munge_shas=True)
def test_push_pull_logging(self):
# Push a new repository.
self.do_push('public', 'newrepo', 'public', 'password')
@ -986,6 +1048,27 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
RegistryTestCaseMixin, LiveServerTestCase):
""" Tests for V2 registry. """
def test_invalid_blob(self):
namespace = 'devtable'
repository = 'somerepo'
tag_name = 'sometag'
repo_name = _get_repo_name(namespace, repository)
self.v2_ping()
self.do_auth('devtable', 'password', namespace, repository, scopes=['push', 'pull'])
# Build a fake manifest.
builder = SignedManifestBuilder(namespace, repository, tag_name)
builder.add_layer('sha256:' + hashlib.sha256('invalid').hexdigest(), json.dumps({'id': 'foo'}))
manifest = builder.build(_JWK)
response = self.conduct('PUT', '/v2/%s/manifests/%s' % (repo_name, tag_name),
data=manifest.bytes, expected_code=404,
headers={'Content-Type': 'application/json'}, auth='jwt')
self.assertEquals('BLOB_UNKNOWN', response.json()['errors'][0]['code'])
def test_delete_manifest(self):
# Push a new repo with the latest tag.
(_, digest) = self.do_push('devtable', 'newrepo', 'devtable', 'password')
@ -1213,6 +1296,16 @@ class V1PushV2PullRegistryTests(V2RegistryPullMixin, V1RegistryPushMixin, Regist
RegistryTestCaseMixin, LiveServerTestCase):
""" Tests for V1 push, V2 pull registry. """
def test_multiple_tag_with_pull(self):
""" Tagging the same exact V1 tag multiple times and then pulling with V2. """
images = self._get_default_images()
self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)
self.do_pull('devtable', 'newrepo', 'devtable', 'password', images=images)
self.do_tag('devtable', 'newrepo', 'latest', images[0]['id'], auth=('devtable', 'password'))
self.do_pull('devtable', 'newrepo', 'devtable', 'password', images=images)
class V1PullV2PushRegistryTests(V1RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMixin,
RegistryTestCaseMixin, LiveServerTestCase):