From 58e553838cd507e8112168c61c67c0686da9f725 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 17 Jul 2018 16:40:56 -0400 Subject: [PATCH] Move manifest corner case tests to pytest --- .../v2/test/test_manifest_cornercases.py | 134 ++++++++++++++++ test/test_manifests.py | 146 ------------------ 2 files changed, 134 insertions(+), 146 deletions(-) create mode 100644 endpoints/v2/test/test_manifest_cornercases.py delete mode 100644 test/test_manifests.py diff --git a/endpoints/v2/test/test_manifest_cornercases.py b/endpoints/v2/test/test_manifest_cornercases.py new file mode 100644 index 000000000..a2deee92f --- /dev/null +++ b/endpoints/v2/test/test_manifest_cornercases.py @@ -0,0 +1,134 @@ +import hashlib + +from contextlib import contextmanager + +from app import storage, docker_v2_signing_key +from data import model, database +from endpoints.v2.manifest import _write_manifest +from image.docker.schema1 import DockerSchema1ManifestBuilder + +from test.fixtures import * + + +ADMIN_ACCESS_USER = 'devtable' +REPO = 'simple' +FIRST_TAG = 'first' +SECOND_TAG = 'second' +THIRD_TAG = 'third' + + +@contextmanager +def set_tag_expiration_policy(namespace, expiration_s=0): + namespace_user = model.user.get_user(namespace) + model.user.change_user_tag_expiration(namespace_user, expiration_s) + yield + + +def _perform_cleanup(): + database.RepositoryTag.delete().where(database.RepositoryTag.hidden == True).execute() + repo_object = model.repository.get_repository(ADMIN_ACCESS_USER, REPO) + model.repository.garbage_collect_repo(repo_object) + + +def test_missing_link(initialized_db): + """ Tests for a corner case that could result in missing a link to a blob referenced by a + manifest. The test exercises the case as follows: + + 1) Push a manifest of a single layer with a Docker ID `FIRST_ID`, pointing + to blob `FIRST_BLOB`. The database should contain the tag referencing the layer, with + no changed ID and the blob not being GCed. + + 2) Push a manifest of two layers: + + Layer 1: `FIRST_ID` with blob `SECOND_BLOB`: Will result in a new synthesized ID + Layer 2: `SECOND_ID` with blob `THIRD_BLOB`: Will result in `SECOND_ID` pointing to the + `THIRD_BLOB`, with a parent pointing to the new synthesized ID's layer. + + 3) Push a manifest of two layers: + + Layer 1: `THIRD_ID` with blob `FOURTH_BLOB`: Will result in a new `THIRD_ID` layer + Layer 2: `FIRST_ID` with blob `THIRD_BLOB`: Since `FIRST_ID` already points to `SECOND_BLOB`, + this will synthesize a new ID. With the current bug, the synthesized ID will match + that of `SECOND_ID`, leaving `THIRD_ID` unlinked and therefore, after a GC, missing + `FOURTH_BLOB`. + """ + with set_tag_expiration_policy('devtable', 0): + location_name = storage.preferred_locations[0] + location = database.ImageStorageLocation.get(name=location_name) + + # Create first blob. + first_blob_sha = 'sha256:' + hashlib.sha256("FIRST").hexdigest() + model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, first_blob_sha, location, 0, 0, 0) + + # Push the first manifest. + first_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, FIRST_TAG) + .add_layer(first_blob_sha, '{"id": "first"}') + .build(docker_v2_signing_key)) + + _write_manifest(ADMIN_ACCESS_USER, REPO, first_manifest) + + # Delete all temp tags and perform GC. + _perform_cleanup() + + # Ensure that the first blob still exists, along with the first tag. + assert model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, first_blob_sha) is not None + assert model.tag.load_tag_manifest(ADMIN_ACCESS_USER, REPO, FIRST_TAG) is not None + assert model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, FIRST_TAG).docker_image_id == 'first' + + # Create the second and third blobs. + second_blob_sha = 'sha256:' + hashlib.sha256("SECOND").hexdigest() + third_blob_sha = 'sha256:' + hashlib.sha256("THIRD").hexdigest() + + model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, second_blob_sha, location, 0, 0, 0) + model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, third_blob_sha, location, 0, 0, 0) + + # Push the second manifest. + second_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, SECOND_TAG) + .add_layer(third_blob_sha, '{"id": "second", "parent": "first"}') + .add_layer(second_blob_sha, '{"id": "first"}') + .build(docker_v2_signing_key)) + + _write_manifest(ADMIN_ACCESS_USER, REPO, second_manifest) + + # Delete all temp tags and perform GC. + _perform_cleanup() + + # Ensure that the first and second blobs still exists, along with the second tag. + assert model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, first_blob_sha) is not None + assert model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, second_blob_sha) is not None + assert model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, third_blob_sha) is not None + + assert model.tag.load_tag_manifest(ADMIN_ACCESS_USER, REPO, FIRST_TAG) is not None + assert model.tag.load_tag_manifest(ADMIN_ACCESS_USER, REPO, SECOND_TAG) is not None + + assert model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, FIRST_TAG).docker_image_id == 'first' + + # Ensure the IDs have changed. + assert model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, SECOND_TAG).parent.docker_image_id != 'first' + assert model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, SECOND_TAG).docker_image_id != 'second' + + # Create the fourth blob. + fourth_blob_sha = 'sha256:' + hashlib.sha256("FOURTH").hexdigest() + model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, fourth_blob_sha, location, 0, 0, 0) + + # Push the third manifest. + third_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, THIRD_TAG) + .add_layer(third_blob_sha, '{"id": "second", "parent": "first"}') + .add_layer(fourth_blob_sha, '{"id": "first"}') # Note the change in BLOB from the second manifest. + .build(docker_v2_signing_key)) + + _write_manifest(ADMIN_ACCESS_USER, REPO, third_manifest) + + # Delete all temp tags and perform GC. + _perform_cleanup() + + # Ensure all blobs are present. + assert model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, first_blob_sha) is not None + assert model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, second_blob_sha) is not None + assert model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, third_blob_sha) is not None + assert model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, fourth_blob_sha) is not None + + # Ensure new synthesized IDs were created. + second_tag_id = model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, SECOND_TAG).docker_image_id + third_tag_id = model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, THIRD_TAG).docker_image_id + assert second_tag_id != third_tag_id diff --git a/test/test_manifests.py b/test/test_manifests.py deleted file mode 100644 index 262aa810a..000000000 --- a/test/test_manifests.py +++ /dev/null @@ -1,146 +0,0 @@ -import unittest -import hashlib - -from app import app, storage, docker_v2_signing_key -from initdb import setup_database_for_testing, finished_database_for_testing -from data import model, database -from endpoints.v2.manifest import _write_manifest -from image.docker.schema1 import DockerSchema1ManifestBuilder - - -ADMIN_ACCESS_USER = 'devtable' -REPO = 'simple' -FIRST_TAG = 'first' -SECOND_TAG = 'second' -THIRD_TAG = 'third' - - -class TestManifests(unittest.TestCase): - @staticmethod - def _set_tag_expiration_policy(namespace, expiration_s): - namespace_user = model.user.get_user(namespace) - model.user.change_user_tag_expiration(namespace_user, expiration_s) - - def setUp(self): - setup_database_for_testing(self) - - self._set_tag_expiration_policy(ADMIN_ACCESS_USER, 0) - - self.app = app.test_client() - self.ctx = app.test_request_context() - self.ctx.__enter__() - - def tearDown(self): - finished_database_for_testing(self) - self.ctx.__exit__(True, None, None) - - def _perform_cleanup(self): - database.RepositoryTag.delete().where(database.RepositoryTag.hidden == True).execute() - repo_object = model.repository.get_repository(ADMIN_ACCESS_USER, REPO) - model.repository.garbage_collect_repo(repo_object) - - def test_missing_link(self): - """ Tests for a corner case that could result in missing a link to a blob referenced by a - manifest. The test exercises the case as follows: - - 1) Push a manifest of a single layer with a Docker ID `FIRST_ID`, pointing - to blob `FIRST_BLOB`. The database should contain the tag referencing the layer, with - no changed ID and the blob not being GCed. - - 2) Push a manifest of two layers: - - Layer 1: `FIRST_ID` with blob `SECOND_BLOB`: Will result in a new synthesized ID - Layer 2: `SECOND_ID` with blob `THIRD_BLOB`: Will result in `SECOND_ID` pointing to the - `THIRD_BLOB`, with a parent pointing to the new synthesized ID's layer. - - 3) Push a manifest of two layers: - - Layer 1: `THIRD_ID` with blob `FOURTH_BLOB`: Will result in a new `THIRD_ID` layer - Layer 2: `FIRST_ID` with blob `THIRD_BLOB`: Since `FIRST_ID` already points to `SECOND_BLOB`, - this will synthesize a new ID. With the current bug, the synthesized ID will match - that of `SECOND_ID`, leaving `THIRD_ID` unlinked and therefore, after a GC, missing - `FOURTH_BLOB`. - """ - location_name = storage.preferred_locations[0] - location = database.ImageStorageLocation.get(name=location_name) - - # Create first blob. - first_blob_sha = 'sha256:' + hashlib.sha256("FIRST").hexdigest() - model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, first_blob_sha, location, 0, 0, 0) - - # Push the first manifest. - first_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, FIRST_TAG) - .add_layer(first_blob_sha, '{"id": "first"}') - .build(docker_v2_signing_key)) - - _write_manifest(ADMIN_ACCESS_USER, REPO, first_manifest) - - # Delete all temp tags and perform GC. - self._perform_cleanup() - - # Ensure that the first blob still exists, along with the first tag. - self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, first_blob_sha)) - self.assertIsNotNone(model.tag.load_tag_manifest(ADMIN_ACCESS_USER, REPO, FIRST_TAG)) - self.assertEquals("first", model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, FIRST_TAG).docker_image_id) - - # Create the second and third blobs. - second_blob_sha = 'sha256:' + hashlib.sha256("SECOND").hexdigest() - third_blob_sha = 'sha256:' + hashlib.sha256("THIRD").hexdigest() - - model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, second_blob_sha, location, 0, 0, 0) - model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, third_blob_sha, location, 0, 0, 0) - - # Push the second manifest. - second_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, SECOND_TAG) - .add_layer(third_blob_sha, '{"id": "second", "parent": "first"}') - .add_layer(second_blob_sha, '{"id": "first"}') - .build(docker_v2_signing_key)) - - _write_manifest(ADMIN_ACCESS_USER, REPO, second_manifest) - - # Delete all temp tags and perform GC. - self._perform_cleanup() - - # Ensure that the first and second blobs still exists, along with the second tag. - self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, first_blob_sha)) - self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, second_blob_sha)) - self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, third_blob_sha)) - - self.assertIsNotNone(model.tag.load_tag_manifest(ADMIN_ACCESS_USER, REPO, FIRST_TAG)) - self.assertIsNotNone(model.tag.load_tag_manifest(ADMIN_ACCESS_USER, REPO, SECOND_TAG)) - - self.assertEquals("first", model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, FIRST_TAG).docker_image_id) - - # Ensure the IDs have changed. - self.assertNotEquals("first", model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, SECOND_TAG).parent.docker_image_id) - self.assertNotEquals("second", model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, SECOND_TAG).docker_image_id) - - # Create the fourth blob. - fourth_blob_sha = 'sha256:' + hashlib.sha256("FOURTH").hexdigest() - model.blob.store_blob_record_and_temp_link(ADMIN_ACCESS_USER, REPO, fourth_blob_sha, location, 0, 0, 0) - - # Push the third manifest. - third_manifest = (DockerSchema1ManifestBuilder(ADMIN_ACCESS_USER, REPO, THIRD_TAG) - .add_layer(third_blob_sha, '{"id": "second", "parent": "first"}') - .add_layer(fourth_blob_sha, '{"id": "first"}') # Note the change in BLOB from the second manifest. - .build(docker_v2_signing_key)) - - _write_manifest(ADMIN_ACCESS_USER, REPO, third_manifest) - - # Delete all temp tags and perform GC. - self._perform_cleanup() - - # Ensure all blobs are present. - self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, first_blob_sha)) - self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, second_blob_sha)) - self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, third_blob_sha)) - self.assertIsNotNone(model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, REPO, fourth_blob_sha)) - - # Ensure new synthesized IDs were created. - self.assertNotEquals( - model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, SECOND_TAG).docker_image_id, - model.tag.get_tag_image(ADMIN_ACCESS_USER, REPO, THIRD_TAG).docker_image_id) - - -if __name__ == '__main__': - unittest.main()