Fix link-to-parent-with-different-blob issue and add a test

This commit is contained in:
Joseph Schorr 2016-03-14 15:24:18 -04:00
parent ba2851c952
commit 57e5141fb5
2 changed files with 165 additions and 15 deletions

View file

@ -124,7 +124,7 @@ class SignedManifest(object):
@property
def layers(self):
""" Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
starting from the leaf image and working toward the base node.
starting from the base image and working toward the leaf node.
"""
for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY],
self._parsed[_HISTORY_KEY])):
@ -152,15 +152,9 @@ class SignedManifest(object):
@property
def payload(self):
protected = str(self._signatures[0][_PROTECTED_KEY])
parsed_protected = json.loads(jwt.utils.base64url_decode(protected))
logger.debug('parsed_protected: %s', parsed_protected)
signed_content_head = self._bytes[:parsed_protected[_FORMAT_LENGTH_KEY]]
logger.debug('signed content head: %s', signed_content_head)
signed_content_tail = jwt.utils.base64url_decode(str(parsed_protected[_FORMAT_TAIL_KEY]))
logger.debug('signed content tail: %s', signed_content_tail)
return signed_content_head + signed_content_tail
@ -189,6 +183,7 @@ class SignedManifestBuilder(object):
self._history.append({
_V1_COMPAT_KEY: v1_json_metadata,
})
return self
def build(self, json_web_key):
@ -354,7 +349,7 @@ def _updated_v1_metadata(v1_metadata_json, updated_id_map):
return json.dumps(parsed)
def _write_manifest(namespace_name, repo_name, manifest):
def _write_manifest_itself(namespace_name, repo_name, manifest):
# Ensure that the manifest is for this repository. If the manifest's namespace is empty, then
# it is for the library namespace and we need an extra check.
if (manifest.namespace == '' and features.LIBRARY_SUPPORT and
@ -396,11 +391,20 @@ def _write_manifest(namespace_name, repo_name, manifest):
has_rewritten_ids = False
updated_id_map = {}
# Synthesized image id hash. Can be used to pull a "content addressable" image id out of thin air.
digest_history = hashlib.sha256()
for mdata in layers:
digest_str = str(mdata.digest)
v1_mdata = mdata.v1_metadata
working_docker_id = v1_mdata.docker_id
# Update our digest_history hash for the new layer data.
digest_history.update(digest_str)
digest_history.update("@")
digest_history.update(mdata.v1_metadata_str.encode('utf-8'))
digest_history.update("|")
# Ensure that all blobs exist.
blob_storage = storage_map.get(digest_str)
if blob_storage is None:
@ -413,10 +417,9 @@ def _write_manifest(namespace_name, repo_name, manifest):
images_map[v1_mdata.docker_id].storage.content_checksum != digest_str) or
has_rewritten_ids):
v1_metadata_str = mdata.v1_metadata_str.encode('utf-8')
working_docker_id = hashlib.sha256(v1_metadata_str + '@' + digest_str).hexdigest()
logger.debug('Rewriting docker_id %s/%s %s -> %s', namespace_name, repo_name,
v1_mdata.docker_id, working_docker_id)
working_docker_id = digest_history.hexdigest()
logger.warning('Rewriting docker_id %s/%s %s -> %s', namespace_name, repo_name,
v1_mdata.docker_id, working_docker_id)
has_rewritten_ids = True
# Store the new docker id in the map
@ -431,9 +434,6 @@ def _write_manifest(namespace_name, repo_name, manifest):
raise ManifestInvalid(detail={'message': msg})
# Synthesize and store the v1 metadata in the db.
digest_str = str(mdata.digest)
blob_storage = storage_map[digest_str]
v1_metadata_json = mdata.v1_metadata_str
if has_rewritten_ids:
v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map)
@ -453,6 +453,11 @@ def _write_manifest(namespace_name, repo_name, manifest):
leaf_layer_id = images_map[layers[-1].v1_metadata.docker_id].docker_image_id
model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest_digest,
manifest.bytes)
return (repo, tag_name, manifest_digest)
def _write_manifest(namespace_name, repo_name, manifest):
(repo, tag_name, manifest_digest) = _write_manifest_itself(namespace_name, repo_name, manifest)
# Spawn the repo_push event.
event_data = {

145
test/test_manifests.py Normal file
View file

@ -0,0 +1,145 @@
import unittest
import time
import hashlib
from app import app, storage, docker_v2_signing_key
from initdb import setup_database_for_testing, finished_database_for_testing
from data import model, database
from endpoints.v2.manifest import _write_manifest_itself, SignedManifestBuilder
ADMIN_ACCESS_USER = 'devtable'
REPO = 'simple'
FIRST_TAG = 'first'
SECOND_TAG = 'second'
THIRD_TAG = 'third'
class TestManifests(unittest.TestCase):
@staticmethod
def _set_tag_expiration_policy(namespace, expiration_s):
namespace_user = model.user.get_user(namespace)
model.user.change_user_tag_expiration(namespace_user, expiration_s)
def setUp(self):
setup_database_for_testing(self)
self._set_tag_expiration_policy(ADMIN_ACCESS_USER, 0)
self.app = app.test_client()
self.ctx = app.test_request_context()
self.ctx.__enter__()
def tearDown(self):
finished_database_for_testing(self)
self.ctx.__exit__(True, None, None)
def _perform_cleanup(self):
database.RepositoryTag.delete().where(database.RepositoryTag.hidden == True).execute()
model.repository.garbage_collect_repository(ADMIN_ACCESS_USER, REPO)
def test_missing_link(self):
""" Tests for a corner case that could result in missing a link to a blob referenced by a
manifest. The test exercises the case as follows:
1) Push a manifest of a single layer with a Docker ID `FIRST_ID`, pointing
to blob `FIRST_BLOB`. The database should contain the tag referencing the layer, with
no changed ID and the blob not being GCed.
2) Push a manifest of two layers:
Layer 1: `FIRST_ID` with blob `SECOND_BLOB`: Will result in a new synthesized ID
Layer 2: `SECOND_ID` with blob `THIRD_BLOB`: Will result in `SECOND_ID` pointing to the
`THIRD_BLOB`, with a parent pointing to the new synthesized ID's layer.
3) Push a manifest of two layers:
Layer 1: `THIRD_ID` with blob `FOURTH_BLOB`: Will result in a new `THIRD_ID` layer
Layer 2: `FIRST_ID` with blob `THIRD_BLOB`: Since `FIRST_ID` already points to `SECOND_BLOB`,
this will synthesize a new ID. With the current bug, the synthesized ID will match
that of `SECOND_ID`, leaving `THIRD_ID` unlinked and therefore, after a GC, missing
`FOURTH_BLOB`.
"""
location_name = storage.preferred_locations[0]
location = database.ImageStorageLocation.get(name=location_name)
# Create first blob.
first_blob_sha = 'sha256:' + hashlib.sha256("FIRST").hexdigest()
model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, first_blob_sha, location, 0, 0, 0)
# Push the first manifest.
first_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, FIRST_TAG)
.add_layer(first_blob_sha, '{"id": "first"}')
.build(docker_v2_signing_key))
_write_manifest_itself(ADMIN_ACCESS_USER, REPO, first_manifest)
# Delete all temp tags and perform GC.
self._perform_cleanup()
# Ensure that the first blob still exists, along with the first tag.
self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, first_blob_sha))
self.assertIsNotNone(model.tag.load_tag_manifest(ADMIN_ACCESS_USER, REPO, FIRST_TAG))
self.assertEquals("first", model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, FIRST_TAG).docker_image_id)
# Create the second and third blobs.
second_blob_sha = 'sha256:' + hashlib.sha256("SECOND").hexdigest()
third_blob_sha = 'sha256:' + hashlib.sha256("THIRD").hexdigest()
model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, second_blob_sha, location, 0, 0, 0)
model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, third_blob_sha, location, 0, 0, 0)
# Push the second manifest.
second_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, SECOND_TAG)
.add_layer(third_blob_sha, '{"id": "second", "parent": "first"}')
.add_layer(second_blob_sha, '{"id": "first"}')
.build(docker_v2_signing_key))
_write_manifest_itself(ADMIN_ACCESS_USER, REPO, second_manifest)
# Delete all temp tags and perform GC.
self._perform_cleanup()
# Ensure that the first and second blobs still exists, along with the second tag.
self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, first_blob_sha))
self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, second_blob_sha))
self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, third_blob_sha))
self.assertIsNotNone(model.tag.load_tag_manifest(ADMIN_ACCESS_USER, REPO, FIRST_TAG))
self.assertIsNotNone(model.tag.load_tag_manifest(ADMIN_ACCESS_USER, REPO, SECOND_TAG))
self.assertEquals("first", model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, FIRST_TAG).docker_image_id)
# Ensure the IDs have changed.
self.assertNotEquals("first", model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, SECOND_TAG).parent.docker_image_id)
self.assertNotEquals("second", model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, SECOND_TAG).docker_image_id)
# Create the fourth blob.
fourth_blob_sha = 'sha256:' + hashlib.sha256("FOURTH").hexdigest()
model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, fourth_blob_sha, location, 0, 0, 0)
# Push the third manifest.
third_manifest = (SignedManifestBuilder(ADMIN_ACCESS_USER, REPO, THIRD_TAG)
.add_layer(third_blob_sha, '{"id": "second", "parent": "first"}')
.add_layer(fourth_blob_sha, '{"id": "first"}') # Note the change in BLOB from the second manifest.
.build(docker_v2_signing_key))
_write_manifest_itself(ADMIN_ACCESS_USER, REPO, third_manifest)
# Delete all temp tags and perform GC.
self._perform_cleanup()
# Ensure all blobs are present.
self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, first_blob_sha))
self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, second_blob_sha))
self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, third_blob_sha))
self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, fourth_blob_sha))
# Ensure new synthesized IDs were created.
self.assertNotEquals(
model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, SECOND_TAG).docker_image_id,
model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, THIRD_TAG).docker_image_id)
if __name__ == '__main__':
unittest.main()