From e33ccff8cbb8d6481be318c9b881a5eca6ef0a2b Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 7 Aug 2018 14:52:33 -0400 Subject: [PATCH 1/3] Fix query count in test --- endpoints/v2/test/test_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/endpoints/v2/test/test_manifest.py b/endpoints/v2/test/test_manifest.py index 8f50fa190..d25a30e3d 100644 --- a/endpoints/v2/test/test_manifest.py +++ b/endpoints/v2/test/test_manifest.py @@ -52,4 +52,4 @@ def test_e2e_query_count_manifest_norewrite(client, app): conduct_call(client, 'v2.write_manifest_by_digest', url_for, 'PUT', params, expected_code=202, headers=headers, raw_body=tag_manifest.json_data) - assert counter.count < 15 + assert counter.count <= 15 From e3da522d26b60899b154be507cda2cc1a8caa3c5 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 7 Aug 2018 14:52:59 -0400 Subject: [PATCH 2/3] Rename store_tag_manifest to indicate clearly it is only to be used for testing --- data/model/tag.py | 7 +++---- data/model/test/test_gc.py | 4 ++-- data/model/test/test_tag.py | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/data/model/tag.py b/data/model/tag.py index 3af310853..a1c1ae24d 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -549,8 +549,8 @@ def restore_tag_to_image(repo_obj, tag_name, docker_image_id): return existing_image -def store_tag_manifest(namespace_name, repository_name, tag_name, manifest, leaf_layer_id=None, - reversion=False): +def store_tag_manifest_for_testing(namespace_name, repository_name, tag_name, manifest, + leaf_layer_id=None): """ Stores a tag manifest for a specific tag name in the database. Returns the TagManifest object, as well as a boolean indicating whether the TagManifest was created. """ @@ -559,8 +559,7 @@ def store_tag_manifest(namespace_name, repository_name, tag_name, manifest, leaf except Repository.DoesNotExist: raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name)) - return store_tag_manifest_for_repo(repo.id, tag_name, manifest, leaf_layer_id=leaf_layer_id, - reversion=False) + return store_tag_manifest_for_repo(repo.id, tag_name, manifest, leaf_layer_id=leaf_layer_id) def store_tag_manifest_for_repo(repository_id, tag_name, manifest, leaf_layer_id=None, diff --git a/data/model/test/test_gc.py b/data/model/test/test_gc.py index aa77a82df..a1a450fea 100644 --- a/data/model/test/test_gc.py +++ b/data/model/test/test_gc.py @@ -72,8 +72,8 @@ def store_tag_manifest(namespace, repo_name, tag_name, image_id): pass manifest = builder.build(docker_v2_signing_key) - manifest_row, _ = model.tag.store_tag_manifest(namespace, repo_name, tag_name, manifest, - leaf_layer_id=image_id) + manifest_row, _ = model.tag.store_tag_manifest_for_testing(namespace, repo_name, tag_name, + manifest, leaf_layer_id=image_id) return manifest_row diff --git a/data/model/test/test_tag.py b/data/model/test/test_tag.py index bb5c9d28b..3cba8e01f 100644 --- a/data/model/test/test_tag.py +++ b/data/model/test/test_tag.py @@ -13,7 +13,7 @@ from data.database import (Image, RepositoryTag, ImageStorage, Repository, Manif from data.model.repository import create_repository from data.model.tag import (list_active_repo_tags, create_or_update_tag, delete_tag, get_matching_tags, _tag_alive, get_matching_tags_for_images, - change_tag_expiration, get_active_tag, store_tag_manifest) + change_tag_expiration, get_active_tag, store_tag_manifest_for_testing) from data.model.image import find_create_or_link_image from image.docker.schema1 import DockerSchema1ManifestBuilder from util.timedeltastring import convert_to_timedelta @@ -234,7 +234,7 @@ def test_store_tag_manifest(initialized_db): find_create_or_link_image(image_id, repo, 'devtable', {}, 'local_us') manifest = builder.build(docker_v2_signing_key) - tag_manifest, _ = store_tag_manifest('devtable', 'simple', 'sometag', manifest) + tag_manifest, _ = store_tag_manifest_for_testing('devtable', 'simple', 'sometag', manifest) # Ensure we have the new-model expected rows. mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest) From 56222f95dcafb85c597f1b2e15e786bc3001da06 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 7 Aug 2018 16:28:50 -0400 Subject: [PATCH 3/3] Change manifest creation to take in the map of blobs that form the manifest We need to lookup the blobs *specific to the images in that manifest*, so we now pass them in from the locations in which we know that information --- data/database.py | 2 - ...c7_remove_blob_index_from_manifestblob_.py | 28 ++++++++++++ data/model/tag.py | 43 ++++++++----------- data/model/test/test_gc.py | 4 +- data/model/test/test_tag.py | 22 ++++++++-- endpoints/v2/manifest.py | 17 +++++--- endpoints/v2/models_interface.py | 2 +- endpoints/v2/models_pre_oci.py | 14 ++++-- 8 files changed, 89 insertions(+), 43 deletions(-) create mode 100644 data/migrations/versions/eafdeadcebc7_remove_blob_index_from_manifestblob_.py diff --git a/data/database.py b/data/database.py index efd015efd..866542c2b 100644 --- a/data/database.py +++ b/data/database.py @@ -1407,14 +1407,12 @@ class ManifestBlob(BaseModel): repository = ForeignKeyField(Repository, index=True) manifest = ForeignKeyField(Manifest) blob = ForeignKeyField(ImageStorage) - blob_index = IntegerField() # 0-indexed location of the blob in the manifest. class Meta: database = db read_slaves = (read_slave,) indexes = ( (('manifest', 'blob'), True), - (('manifest', 'blob_index'), True), ) diff --git a/data/migrations/versions/eafdeadcebc7_remove_blob_index_from_manifestblob_.py b/data/migrations/versions/eafdeadcebc7_remove_blob_index_from_manifestblob_.py new file mode 100644 index 000000000..adf38cefa --- /dev/null +++ b/data/migrations/versions/eafdeadcebc7_remove_blob_index_from_manifestblob_.py @@ -0,0 +1,28 @@ +"""Remove blob_index from ManifestBlob table + +Revision ID: eafdeadcebc7 +Revises: 9093adccc784 +Create Date: 2018-08-07 15:57:54.001225 + +""" + +# revision identifiers, used by Alembic. +revision = 'eafdeadcebc7' +down_revision = '9093adccc784' + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import mysql + +def upgrade(tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_index('manifestblob_manifest_id_blob_index', table_name='manifestblob') + op.drop_column('manifestblob', 'blob_index') + # ### end Alembic commands ### + + +def downgrade(tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('manifestblob', sa.Column('blob_index', mysql.INTEGER(display_width=11), autoincrement=False, nullable=False)) + op.create_index('manifestblob_manifest_id_blob_index', 'manifestblob', ['manifest_id', 'blob_index'], unique=True) + # ### end Alembic commands ### diff --git a/data/model/tag.py b/data/model/tag.py index a1c1ae24d..cd86e3dc4 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -4,7 +4,7 @@ from calendar import timegm from uuid import uuid4 from peewee import IntegrityError, JOIN, fn -from data.model import (image, db_transaction, DataModelException, _basequery, +from data.model import (image, storage, db_transaction, DataModelException, _basequery, InvalidManifestException, TagAlreadyCreatedException, StaleTagException, config) from data.database import (RepositoryTag, Repository, Image, ImageStorage, Namespace, TagManifest, @@ -550,7 +550,7 @@ def restore_tag_to_image(repo_obj, tag_name, docker_image_id): def store_tag_manifest_for_testing(namespace_name, repository_name, tag_name, manifest, - leaf_layer_id=None): + leaf_layer_id, storage_id_map): """ Stores a tag manifest for a specific tag name in the database. Returns the TagManifest object, as well as a boolean indicating whether the TagManifest was created. """ @@ -559,21 +559,16 @@ def store_tag_manifest_for_testing(namespace_name, repository_name, tag_name, ma except Repository.DoesNotExist: raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name)) - return store_tag_manifest_for_repo(repo.id, tag_name, manifest, leaf_layer_id=leaf_layer_id) + return store_tag_manifest_for_repo(repo.id, tag_name, manifest, leaf_layer_id, storage_id_map) -def store_tag_manifest_for_repo(repository_id, tag_name, manifest, leaf_layer_id=None, +def store_tag_manifest_for_repo(repository_id, tag_name, manifest, leaf_layer_id, storage_id_map, reversion=False): """ Stores a tag manifest for a specific tag name in the database. Returns the TagManifest object, as well as a boolean indicating whether the TagManifest was created. """ - # Lookup all blobs in the manifest. - blobs = ImageStorage.select().where(ImageStorage.content_checksum << list(manifest.blob_digests)) - blob_map = {blob.content_checksum: blob for blob in blobs} - - docker_image_id = leaf_layer_id or manifest.leaf_layer_v1_image_id with db_transaction(): - tag = create_or_update_tag_for_repo(repository_id, tag_name, docker_image_id, + tag = create_or_update_tag_for_repo(repository_id, tag_name, leaf_layer_id, reversion=reversion) try: @@ -582,7 +577,7 @@ def store_tag_manifest_for_repo(repository_id, tag_name, manifest, leaf_layer_id manifest.save() return manifest, False except TagManifest.DoesNotExist: - return _create_manifest(tag, manifest, blob_map), True + return _create_manifest(tag, manifest, storage_id_map), True def get_active_tag(namespace, repo_name, tag_name): @@ -603,16 +598,12 @@ def get_possibly_expired_tag(namespace, repo_name, tag_name): Namespace.username == namespace)).get() -def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest): +def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest, storage_id_map): tag = get_active_tag(namespace, repo_name, tag_name) - - # Lookup all blobs in the manifest. - blobs = ImageStorage.select().where(ImageStorage.content_checksum << list(manifest.blob_digests)) - blob_map = {blob.content_checksum: blob for blob in blobs} - return _create_manifest(tag, manifest, blob_map) + return _create_manifest(tag, manifest, storage_id_map) -def _create_manifest(tag, manifest, blob_map): +def _create_manifest(tag, manifest, storage_id_map): media_type = Manifest.media_type.get_id(manifest.media_type) with db_transaction(): @@ -621,17 +612,17 @@ def _create_manifest(tag, manifest, blob_map): ManifestLegacyImage.create(manifest=manifest_row, repository=tag.repository, image=tag.image) blobs_created = set() - for index, blob_digest in enumerate(reversed(manifest.blob_digests)): - image_storage = blob_map.get(blob_digest) - if image_storage is None: - raise DataModelException('Missing blob for manifest') + for blob_digest in reversed(manifest.blob_digests): + image_storage_id = storage_id_map.get(blob_digest) + if image_storage_id is None: + logger.error('Missing blob for manifest `%s` in: %s', blob_digest, storage_id_map) + raise DataModelException('Missing blob for manifest `%s`' % blob_digest) - if image_storage.id in blobs_created: + if image_storage_id in blobs_created: continue - blobs_created.add(image_storage.id) - ManifestBlob.create(manifest=manifest_row, repository=tag.repository, blob=image_storage, - blob_index=index) + ManifestBlob.create(manifest=manifest_row, repository=tag.repository, blob=image_storage_id) + blobs_created.add(image_storage_id) tag_manifest = TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes) TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest_row) diff --git a/data/model/test/test_gc.py b/data/model/test/test_gc.py index a1a450fea..369cdc4d5 100644 --- a/data/model/test/test_gc.py +++ b/data/model/test/test_gc.py @@ -65,15 +65,17 @@ def create_image(docker_image_id, repository_obj, username): def store_tag_manifest(namespace, repo_name, tag_name, image_id): builder = DockerSchema1ManifestBuilder(namespace, repo_name, tag_name) + storage_id_map = {} try: image_storage = ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).get() builder.add_layer(image_storage.content_checksum, '{"id": "foo"}') + storage_id_map[image_storage.content_checksum] = image_storage.id except ImageStorage.DoesNotExist: pass manifest = builder.build(docker_v2_signing_key) manifest_row, _ = model.tag.store_tag_manifest_for_testing(namespace, repo_name, tag_name, - manifest, leaf_layer_id=image_id) + manifest, image_id, storage_id_map) return manifest_row diff --git a/data/model/test/test_tag.py b/data/model/test/test_tag.py index 3cba8e01f..97bb8e154 100644 --- a/data/model/test/test_tag.py +++ b/data/model/test/test_tag.py @@ -220,21 +220,37 @@ def test_change_tag_expiration(expiration_offset, expected_offset, initialized_d assert (expected_end_date - end_date).total_seconds() < 5 # variance in test -def test_store_tag_manifest(initialized_db): +def random_storages(): + return list(ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).limit(10)) + + +def repeated_storages(): + storages = list(ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).limit(5)) + return storages + storages + + +@pytest.mark.parametrize('get_storages', [ + random_storages, + repeated_storages, +]) +def test_store_tag_manifest(get_storages, initialized_db): # Create a manifest with some layers. builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'sometag') - storages = list(ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).limit(10)) + storages = get_storages() assert storages repo = model.repository.get_repository('devtable', 'simple') + storage_id_map = {} for index, storage in enumerate(storages): image_id = 'someimage%s' % index builder.add_layer(storage.content_checksum, json.dumps({'id': image_id})) find_create_or_link_image(image_id, repo, 'devtable', {}, 'local_us') + storage_id_map[storage.content_checksum] = storage.id manifest = builder.build(docker_v2_signing_key) - tag_manifest, _ = store_tag_manifest_for_testing('devtable', 'simple', 'sometag', manifest) + tag_manifest, _ = store_tag_manifest_for_testing('devtable', 'simple', 'sometag', manifest, + manifest.leaf_layer_v1_image_id, storage_id_map) # Ensure we have the new-model expected rows. mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 468a64cdf..a3a118c92 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -157,10 +157,10 @@ def _write_manifest(namespace_name, repo_name, manifest): raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) # Ensure all the blobs in the manifest exist. - storage_map = model.lookup_blobs_by_digest(repo, manifest.checksums) + blob_map = model.lookup_blobs_by_digest(repo, manifest.checksums) for layer in manifest.layers: digest_str = str(layer.digest) - if digest_str not in storage_map: + if digest_str not in blob_map: raise BlobUnknown(detail={'digest': digest_str}) # Lookup all the images and their parent images (if any) inside the manifest. @@ -177,7 +177,7 @@ def _write_manifest(namespace_name, repo_name, manifest): if not rewritten_image.image_id in images_map: model.synthesize_v1_image( repo, - storage_map[rewritten_image.content_checksum], + blob_map[rewritten_image.content_checksum], rewritten_image.image_id, rewritten_image.created, rewritten_image.comment, @@ -191,7 +191,7 @@ def _write_manifest(namespace_name, repo_name, manifest): # Store the manifest pointing to the tag. leaf_layer_id = rewritten_images[-1].image_id - newly_created = model.save_manifest(repo, manifest.tag, manifest, leaf_layer_id) + newly_created = model.save_manifest(repo, manifest.tag, manifest, leaf_layer_id, blob_map) if newly_created: # TODO: make this batch labels = [] @@ -202,18 +202,18 @@ def _write_manifest(namespace_name, repo_name, manifest): model.create_manifest_labels(namespace_name, repo_name, manifest.digest, labels) - return repo, storage_map + return repo, blob_map def _write_manifest_and_log(namespace_name, repo_name, manifest): - repo, storage_map = _write_manifest(namespace_name, repo_name, manifest) + repo, blob_map = _write_manifest(namespace_name, repo_name, manifest) # Queue all blob manifests for replication. if features.STORAGE_REPLICATION: with queue_replication_batch(namespace_name) as queue_storage_replication: for layer in manifest.layers: digest_str = str(layer.digest) - queue_storage_replication(storage_map[digest_str]) + queue_storage_replication(blob_map[digest_str]) track_and_log('push_repo', repo, tag=manifest.tag) spawn_notification(repo, 'repo_push', {'updated_tags': [manifest.tag]}) @@ -254,6 +254,9 @@ def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref): def _generate_and_store_manifest(namespace_name, repo_name, tag_name): + """ Generates and stores a manifest for an existing V1-only tag. """ + # TODO(jschorr): Remove once we are fully on Manifest-based model. + # Find the v1 metadata for this image and its parents. v1_metadata = model.get_docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name) parents_v1_metadata = model.get_parents_docker_v1_metadata(namespace_name, repo_name, diff --git a/endpoints/v2/models_interface.py b/endpoints/v2/models_interface.py index 7ba41a504..59557801a 100644 --- a/endpoints/v2/models_interface.py +++ b/endpoints/v2/models_interface.py @@ -155,7 +155,7 @@ class DockerRegistryV2DataInterface(object): pass @abstractmethod - def save_manifest(self, repository, tag_name, manifest): + def save_manifest(self, repository, tag_name, manifest, blob_map): """ Saves a manifest, under the matching repository as a tag with the given name. diff --git a/endpoints/v2/models_pre_oci.py b/endpoints/v2/models_pre_oci.py index 81732052c..256fbbc8a 100644 --- a/endpoints/v2/models_pre_oci.py +++ b/endpoints/v2/models_pre_oci.py @@ -93,8 +93,15 @@ class PreOCIModel(DockerRegistryV2DataInterface): def create_manifest_and_update_tag(self, namespace_name, repo_name, tag_name, manifest): assert isinstance(manifest, ManifestInterface) + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return + + blob_map = self.lookup_blobs_by_digest(repo, manifest.checksums) + storage_map = {blob.digest: blob.id for blob_digest, blob in blob_map.iteritems()} try: - model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, manifest) + model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, manifest, + storage_map) except IntegrityError: # It's already there! pass @@ -112,10 +119,11 @@ class PreOCIModel(DockerRegistryV2DataInterface): parent_image) return _docker_v1_metadata(repository.namespace_name, repository.name, repo_image) - def save_manifest(self, repository, tag_name, manifest, leaf_layer_id=None): + def save_manifest(self, repository, tag_name, manifest, leaf_layer_id, blob_map): assert isinstance(manifest, ManifestInterface) + storage_map = {blob.digest: blob.id for blob_digest, blob in blob_map.iteritems()} (_, newly_created) = model.tag.store_tag_manifest_for_repo(repository.id, tag_name, manifest, - leaf_layer_id=leaf_layer_id) + leaf_layer_id, storage_map) return newly_created def repository_tags(self, namespace_name, repo_name, start_id, limit):