diff --git a/data/database.py b/data/database.py index 520a386e6..4a12e05c2 100644 --- a/data/database.py +++ b/data/database.py @@ -503,7 +503,8 @@ class User(BaseModel): TagManifest, AccessToken, OAuthAccessToken, BlobUpload, RepositoryNotification, OAuthAuthorizationCode, RepositoryActionCount, TagManifestLabel, - TeamSync, RepositorySearchScore, DeletedNamespace} | appr_classes + TeamSync, RepositorySearchScore, + DeletedNamespace} | appr_classes | v22_classes delete_instance_filtered(self, User, delete_nullable, skip_transitive_deletes) @@ -651,7 +652,7 @@ class Repository(BaseModel): # are cleaned up directly skip_transitive_deletes = {RepositoryTag, RepositoryBuild, RepositoryBuildTrigger, BlobUpload, Image, TagManifest, TagManifestLabel, Label, DerivedStorageForImage, - RepositorySearchScore} | appr_classes + RepositorySearchScore} | appr_classes | v22_classes delete_instance_filtered(self, Repository, delete_nullable, skip_transitive_deletes) @@ -898,12 +899,6 @@ class RepositoryTag(BaseModel): ) -class TagManifest(BaseModel): - tag = ForeignKeyField(RepositoryTag, unique=True) - digest = CharField(index=True) - json_data = TextField() - - class BUILD_PHASE(object): """ Build phases enum """ ERROR = 'error' @@ -1240,21 +1235,6 @@ class Label(BaseModel): source_type = ForeignKeyField(LabelSourceType) -class TagManifestLabel(BaseModel): - """ Mapping from a tag manifest to a label. - """ - repository = ForeignKeyField(Repository, index=True) - annotated = ForeignKeyField(TagManifest, index=True) - label = ForeignKeyField(Label) - - class Meta: - database = db - read_slaves = (read_slave,) - indexes = ( - (('annotated', 'label'), True), - ) - - class ApprBlob(BaseModel): """ ApprBlob represents a content-addressable object stored outside of the database. """ @@ -1385,10 +1365,104 @@ class AppSpecificAuthToken(BaseModel): indexes = ( (('user', 'expiration'), False), ) - + + +class Manifest(BaseModel): + """ Manifest represents a single manifest under a repository. Within a repository, + there can only be one manifest with the same digest. + """ + repository = ForeignKeyField(Repository) + digest = CharField(index=True) + media_type = EnumField(MediaType) + manifest_bytes = TextField() + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('repository', 'digest'), True), + (('repository', 'media_type'), False), + ) + + +class ManifestLabel(BaseModel): + """ ManifestLabel represents a label applied to a Manifest, within a repository. + Note that since Manifests are stored per-repository, the repository here is + a bit redundant, but we do so to make cleanup easier. + """ + repository = ForeignKeyField(Repository, index=True) + manifest = ForeignKeyField(Manifest) + label = ForeignKeyField(Label) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('manifest', 'label'), True), + ) + + +class ManifestBlob(BaseModel): + """ ManifestBlob represents a blob that is used by a manifest. """ + repository = ForeignKeyField(Repository, index=True) + manifest = ForeignKeyField(Manifest) + blob = ForeignKeyField(ImageStorage) + blob_index = IntegerField() # 0-indexed location of the blob in the manifest. + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('manifest', 'blob'), True), + (('manifest', 'blob_index'), True), + ) + + +class ManifestLegacyImage(BaseModel): + """ For V1-compatible manifests only, this table maps from the manifest to its associated + Docker image. + """ + repository = ForeignKeyField(Repository, index=True) + manifest = ForeignKeyField(Manifest, unique=True) + image = ForeignKeyField(Image) + + +class TagManifest(BaseModel): + """ TO BE DEPRECATED: The manifest for a tag. """ + tag = ForeignKeyField(RepositoryTag, unique=True) + digest = CharField(index=True) + json_data = TextField() + + # Note: `manifest` will be back-filled by a worker and may not be present + # currently. + manifest = ForeignKeyField(Manifest, null=True, index=True) + broken = BooleanField(null=True, index=True) + +class TagManifestLabel(BaseModel): + """ TO BE DEPRECATED: Mapping from a tag manifest to a label. + """ + repository = ForeignKeyField(Repository, index=True) + annotated = ForeignKeyField(TagManifest, index=True) + label = ForeignKeyField(Label) + + # Note: `manifest_label` will be back-filled by a worker and may not be present + # currently. + manifest_label = ForeignKeyField(ManifestLabel, null=True, index=True) + broken_manifest = BooleanField(null=True, index=True) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('annotated', 'label'), True), + ) + + appr_classes = set([ApprTag, ApprTagKind, ApprBlobPlacementLocation, ApprManifestList, ApprManifestBlob, ApprBlob, ApprManifestListManifest, ApprManifest, ApprBlobPlacement]) +v22_classes = set([Manifest, ManifestLabel, ManifestBlob, ManifestLegacyImage]) + is_model = lambda x: inspect.isclass(x) and issubclass(x, BaseModel) and x is not BaseModel all_models = [model[1] for model in inspect.getmembers(sys.modules[__name__], is_model)] diff --git a/data/migrations/versions/7734c7584421_add_v2_2_data_models_for_manifest_.py b/data/migrations/versions/7734c7584421_add_v2_2_data_models_for_manifest_.py new file mode 100644 index 000000000..6cbbbe076 --- /dev/null +++ b/data/migrations/versions/7734c7584421_add_v2_2_data_models_for_manifest_.py @@ -0,0 +1,153 @@ +"""Add V2_2 data models for Manifest, ManifestBlob and ManifestLegacyImage + +Revision ID: 7734c7584421 +Revises: 6c21e2cfb8b6 +Create Date: 2018-07-31 13:26:02.850353 + +""" + +# revision identifiers, used by Alembic. +revision = '7734c7584421' +down_revision = '6c21e2cfb8b6' + +from alembic import op +import sqlalchemy as sa +from image.docker.schema1 import DOCKER_SCHEMA1_CONTENT_TYPES + + +def upgrade(tables, tester): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('manifest', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('repository_id', sa.Integer(), nullable=False), + sa.Column('digest', sa.String(length=255), nullable=False), + sa.Column('media_type_id', sa.Integer(), nullable=False), + sa.Column('manifest_bytes', sa.Text(), nullable=False), + sa.ForeignKeyConstraint(['media_type_id'], ['mediatype.id'], name=op.f('fk_manifest_media_type_id_mediatype')), + sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifest_repository_id_repository')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_manifest')) + ) + op.create_index('manifest_digest', 'manifest', ['digest'], unique=False) + op.create_index('manifest_media_type_id', 'manifest', ['media_type_id'], unique=False) + op.create_index('manifest_repository_id', 'manifest', ['repository_id'], unique=False) + op.create_index('manifest_repository_id_digest', 'manifest', ['repository_id', 'digest'], unique=True) + op.create_index('manifest_repository_id_media_type_id', 'manifest', ['repository_id', 'media_type_id'], unique=False) + op.create_table('manifestblob', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('repository_id', sa.Integer(), nullable=False), + sa.Column('manifest_id', sa.Integer(), nullable=False), + sa.Column('blob_id', sa.Integer(), nullable=False), + sa.Column('blob_index', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['blob_id'], ['imagestorage.id'], name=op.f('fk_manifestblob_blob_id_imagestorage')), + sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestblob_manifest_id_manifest')), + sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestblob_repository_id_repository')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestblob')) + ) + op.create_index('manifestblob_blob_id', 'manifestblob', ['blob_id'], unique=False) + op.create_index('manifestblob_manifest_id', 'manifestblob', ['manifest_id'], unique=False) + op.create_index('manifestblob_manifest_id_blob_id', 'manifestblob', ['manifest_id', 'blob_id'], unique=True) + op.create_index('manifestblob_manifest_id_blob_index', 'manifestblob', ['manifest_id', 'blob_index'], unique=True) + op.create_index('manifestblob_repository_id', 'manifestblob', ['repository_id'], unique=False) + op.create_table('manifestlabel', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('repository_id', sa.Integer(), nullable=False), + sa.Column('manifest_id', sa.Integer(), nullable=False), + sa.Column('label_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['label_id'], ['label.id'], name=op.f('fk_manifestlabel_label_id_label')), + sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlabel_manifest_id_manifest')), + sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestlabel_repository_id_repository')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlabel')) + ) + op.create_index('manifestlabel_label_id', 'manifestlabel', ['label_id'], unique=False) + op.create_index('manifestlabel_manifest_id', 'manifestlabel', ['manifest_id'], unique=False) + op.create_index('manifestlabel_manifest_id_label_id', 'manifestlabel', ['manifest_id', 'label_id'], unique=True) + op.create_index('manifestlabel_repository_id', 'manifestlabel', ['repository_id'], unique=False) + op.create_table('manifestlegacyimage', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('repository_id', sa.Integer(), nullable=False), + sa.Column('manifest_id', sa.Integer(), nullable=False), + sa.Column('image_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['image_id'], ['image.id'], name=op.f('fk_manifestlegacyimage_image_id_image')), + sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_manifestlegacyimage_manifest_id_manifest')), + sa.ForeignKeyConstraint(['repository_id'], ['repository.id'], name=op.f('fk_manifestlegacyimage_repository_id_repository')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_manifestlegacyimage')) + ) + op.create_index('manifestlegacyimage_image_id', 'manifestlegacyimage', ['image_id'], unique=False) + op.create_index('manifestlegacyimage_manifest_id', 'manifestlegacyimage', ['manifest_id'], unique=True) + op.create_index('manifestlegacyimage_repository_id', 'manifestlegacyimage', ['repository_id'], unique=False) + + op.add_column(u'tagmanifest', sa.Column('broken', sa.Boolean(), nullable=True)) + op.add_column(u'tagmanifest', sa.Column('manifest_id', sa.Integer(), nullable=True)) + op.create_index('tagmanifest_broken', 'tagmanifest', ['broken'], unique=False) + op.create_index('tagmanifest_manifest_id', 'tagmanifest', ['manifest_id'], unique=False) + op.create_foreign_key(op.f('fk_tagmanifest_manifest_id_manifest'), 'tagmanifest', 'manifest', ['manifest_id'], ['id']) + op.add_column(u'tagmanifestlabel', sa.Column('broken_manifest', sa.Boolean(), nullable=True)) + op.add_column(u'tagmanifestlabel', sa.Column('manifest_label_id', sa.Integer(), nullable=True)) + op.create_index('tagmanifestlabel_broken_manifest', 'tagmanifestlabel', ['broken_manifest'], unique=False) + op.create_index('tagmanifestlabel_manifest_label_id', 'tagmanifestlabel', ['manifest_label_id'], unique=False) + op.create_foreign_key(op.f('fk_tagmanifestlabel_manifest_label_id_manifestlabel'), 'tagmanifestlabel', 'manifestlabel', ['manifest_label_id'], ['id']) + # ### end Alembic commands ### + + for media_type in DOCKER_SCHEMA1_CONTENT_TYPES: + op.bulk_insert(tables.mediatype, + [ + {'name': media_type}, + ]) + + # ### population of test data ### # + tester.populate_table('manifest', [ + ('digest', tester.TestDataType.String), + ('manifest_bytes', tester.TestDataType.JSON), + ('media_type_id', tester.TestDataType.Foreign('mediatype')), + ('repository_id', tester.TestDataType.Foreign('repository')), + ]) + + tester.populate_table('manifestblob', [ + ('manifest_id', tester.TestDataType.Foreign('manifest')), + ('repository_id', tester.TestDataType.Foreign('repository')), + ('blob_id', tester.TestDataType.Foreign('imagestorage')), + ('blob_index', tester.TestDataType.Integer), + ]) + + tester.populate_table('manifestlabel', [ + ('manifest_id', tester.TestDataType.Foreign('manifest')), + ('label_id', tester.TestDataType.Foreign('label')), + ('repository_id', tester.TestDataType.Foreign('repository')), + ]) + + tester.populate_table('manifestlegacyimage', [ + ('manifest_id', tester.TestDataType.Foreign('manifest')), + ('image_id', tester.TestDataType.Foreign('image')), + ('repository_id', tester.TestDataType.Foreign('repository')), + ]) + + tester.populate_column('tagmanifest', 'manifest_id', tester.TestDataType.Foreign('manifest')) + tester.populate_column('tagmanifestlabel', 'manifest_label_id', tester.TestDataType.Foreign('manifestlabel')) + # ### end population of test data ### # + + +def downgrade(tables, tester): + for media_type in DOCKER_SCHEMA1_CONTENT_TYPES: + op.execute(tables + .mediatype + .delete() + .where(tables. + mediatype.c.name == op.inline_literal(media_type))) + + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint(op.f('fk_tagmanifestlabel_manifest_label_id_manifestlabel'), 'tagmanifestlabel', type_='foreignkey') + op.drop_index('tagmanifestlabel_manifest_label_id', table_name='tagmanifestlabel') + op.drop_index('tagmanifestlabel_broken_manifest', table_name='tagmanifestlabel') + op.drop_column(u'tagmanifestlabel', 'manifest_label_id') + op.drop_column(u'tagmanifestlabel', 'broken_manifest') + op.drop_constraint(op.f('fk_tagmanifest_manifest_id_manifest'), 'tagmanifest', type_='foreignkey') + op.drop_index('tagmanifest_manifest_id', table_name='tagmanifest') + op.drop_index('tagmanifest_broken', table_name='tagmanifest') + op.drop_column(u'tagmanifest', 'manifest_id') + op.drop_column(u'tagmanifest', 'broken') + + op.drop_table('manifestlegacyimage') + op.drop_table('manifestlabel') + op.drop_table('manifestblob') + op.drop_table('manifest') + # ### end Alembic commands ### diff --git a/data/model/label.py b/data/model/label.py index aef0a9449..e2fe6ee21 100644 --- a/data/model/label.py +++ b/data/model/label.py @@ -2,7 +2,8 @@ import logging from cachetools import lru_cache -from data.database import Label, TagManifestLabel, MediaType, LabelSourceType, db_transaction +from data.database import (Label, TagManifestLabel, MediaType, LabelSourceType, db_transaction, + ManifestLabel) from data.model import InvalidLabelKeyException, InvalidMediaTypeException, DataModelException from data.text import prefix_search from util.validation import validate_label_key @@ -70,6 +71,9 @@ def create_manifest_label(tag_manifest, key, value, source_type_name, media_type label = Label.create(key=key, value=value, source_type=source_type_id, media_type=media_type_id) TagManifestLabel.create(annotated=tag_manifest, label=label, repository=tag_manifest.tag.repository) + if tag_manifest.manifest is not None: + ManifestLabel.create(manifest=tag_manifest.manifest, label=label, + repository=tag_manifest.tag.repository) return label @@ -115,11 +119,14 @@ def delete_manifest_label(label_uuid, tag_manifest): if not label.source_type.mutable: raise DataModelException('Cannot delete immutable label') - # Delete the mapping record and label. + # Delete the mapping records and label. deleted_count = TagManifestLabel.delete().where(TagManifestLabel.label == label).execute() if deleted_count != 1: logger.warning('More than a single label deleted for matching label %s', label_uuid) + deleted_count = ManifestLabel.delete().where(ManifestLabel.label == label).execute() + if deleted_count != 1: + logger.warning('More than a single label deleted for matching label %s', label_uuid) + label.delete_instance(recursive=False) return label - diff --git a/data/model/repository.py b/data/model/repository.py index 1bba795c8..6bd833a5b 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -10,9 +10,10 @@ from data.model import ( config, DataModelException, tag, db_transaction, storage, permission, _basequery) from data.database import ( Repository, Namespace, RepositoryTag, Star, Image, ImageStorage, User, Visibility, - RepositoryPermission, RepositoryActionCount, Role, RepositoryAuthorizedEmail, TagManifest, - DerivedStorageForImage, Label, TagManifestLabel, db_for_update, get_epoch_timestamp, - db_random_func, db_concat_func, RepositorySearchScore, RepositoryKind, ApprTag) + RepositoryPermission, RepositoryActionCount, Role, RepositoryAuthorizedEmail, + DerivedStorageForImage, Label, db_for_update, get_epoch_timestamp, + db_random_func, db_concat_func, RepositorySearchScore, RepositoryKind, ApprTag, + ManifestLegacyImage, Manifest) from data.text import prefix_search from util.itertoolrecipes import take @@ -275,6 +276,13 @@ def garbage_collect_repo(repo, extra_candidate_set=None, is_purge=False): logger.info('Could not GC derived images %s; will try again soon', image_ids_to_remove) return False + # Delete any legacy references to the images. + (ManifestLegacyImage + .delete() + .where(ManifestLegacyImage.image << image_ids_to_remove) + .execute()) + + # Delete the images themselves. try: Image.delete().where(Image.id << image_ids_to_remove).execute() except IntegrityError: diff --git a/data/model/storage.py b/data/model/storage.py index 58d5ce351..8ec237406 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -9,7 +9,7 @@ from data.model import (config, db_transaction, InvalidImageException, TorrentIn from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation, ImageStorageTransformation, ImageStorageSignature, ImageStorageSignatureKind, Repository, Namespace, TorrentInfo, ApprBlob, - ensure_under_transaction) + ensure_under_transaction, ManifestBlob) logger = logging.getLogger(__name__) @@ -161,6 +161,12 @@ def garbage_collect_storage(storage_id_whitelist): .execute()) logger.debug('Removed %s image storage signatures', signatures_removed) + blob_refs_removed = (ManifestBlob + .delete() + .where(ManifestBlob.blob << orphaned_storage_ids) + .execute()) + logger.debug('Removed %s blob references', blob_refs_removed) + storages_removed = (ImageStorage .delete() .where(ImageStorage.id << orphaned_storage_ids) diff --git a/data/model/tag.py b/data/model/tag.py index b9939740f..9c653d0d1 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -1,5 +1,4 @@ import logging -import time from calendar import timegm from uuid import uuid4 @@ -10,7 +9,8 @@ from data.model import (image, db_transaction, DataModelException, _basequery, config) from data.database import (RepositoryTag, Repository, Image, ImageStorage, Namespace, TagManifest, RepositoryNotification, Label, TagManifestLabel, get_epoch_timestamp, - db_for_update) + db_for_update, Manifest, ManifestLabel, ManifestBlob, + ManifestLegacyImage) from util.timedeltastring import convert_to_timedelta @@ -352,44 +352,64 @@ def _delete_tags(repo, query_modifier=None): return set() with db_transaction(): - manifests_to_delete = list(TagManifest - .select(TagManifest.id) - .join(RepositoryTag) - .where(RepositoryTag.id << tags_to_delete)) + # TODO(jschorr): Update to not use TagManifest once that table has been deprecated. + tag_manifests_to_delete = list(TagManifest + .select() + .join(RepositoryTag) + .where(RepositoryTag.id << tags_to_delete)) + tag_manifest_ids_to_delete = [tagmanifest.id for tagmanifest in tag_manifests_to_delete] + manifest_ids_to_delete = [tagmanifest.manifest_id for tagmanifest in tag_manifests_to_delete + if tagmanifest.manifest is not None] num_deleted_manifests = 0 - if len(manifests_to_delete) > 0: + if len(tag_manifest_ids_to_delete) > 0: # Find the set of IDs for all the labels to delete. manifest_labels_query = (TagManifestLabel .select() .where(TagManifestLabel.repository == repo, - TagManifestLabel.annotated << manifests_to_delete)) - + TagManifestLabel.annotated << tag_manifest_ids_to_delete)) label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query] - if label_ids: - # Delete all the mapping entries. - (TagManifestLabel + + # Delete all the mapping entries for labels. + (TagManifestLabel + .delete() + .where(TagManifestLabel.repository == repo, + TagManifestLabel.annotated << tag_manifest_ids_to_delete) + .execute()) + + if manifest_ids_to_delete: + (ManifestLabel .delete() - .where(TagManifestLabel.repository == repo, - TagManifestLabel.annotated << manifests_to_delete) + .where(ManifestLabel.manifest << manifest_ids_to_delete) .execute()) + # Delete the labels themselves. + if label_ids: # Delete all the matching labels. Label.delete().where(Label.id << label_ids).execute() - + # Delete the old-style manifests. num_deleted_manifests = (TagManifest .delete() - .where(TagManifest.id << manifests_to_delete) + .where(TagManifest.id << tag_manifest_ids_to_delete) .execute()) + # Delete the new-style manifests, if any. + if manifest_ids_to_delete: + (ManifestLegacyImage + .delete() + .where(ManifestLegacyImage.manifest << manifest_ids_to_delete) + .execute()) + + ManifestBlob.delete().where(ManifestBlob.manifest << manifest_ids_to_delete).execute() + Manifest.delete().where(Manifest.id << manifest_ids_to_delete).execute() + num_deleted_tags = (RepositoryTag .delete() .where(RepositoryTag.id << tags_to_delete) .execute()) logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests) - ancestors = reduce(lambda r, l: r | l, (set(tag.image.ancestor_id_list()) for tag in tags_to_delete)) direct_referenced = {tag.image.id for tag in tags_to_delete} @@ -459,14 +479,14 @@ def restore_tag_to_manifest(repo_obj, tag_name, manifest_digest): # Verify that the manifest digest already existed under this repository under the # tag. try: - manifest = (TagManifest - .select(TagManifest, RepositoryTag, Image) - .join(RepositoryTag) - .join(Image) - .where(RepositoryTag.repository == repo_obj) - .where(RepositoryTag.name == tag_name) - .where(TagManifest.digest == manifest_digest) - .get()) + tag_manifest = (TagManifest + .select(TagManifest, RepositoryTag, Image) + .join(RepositoryTag) + .join(Image) + .where(RepositoryTag.repository == repo_obj) + .where(RepositoryTag.name == tag_name) + .where(TagManifest.digest == manifest_digest) + .get()) except TagManifest.DoesNotExist: raise DataModelException('Cannot restore to unknown or invalid digest') @@ -476,9 +496,12 @@ def restore_tag_to_manifest(repo_obj, tag_name, manifest_digest): except DataModelException: existing_image = None - docker_image_id = manifest.tag.image.docker_image_id - store_tag_manifest(repo_obj.namespace_user.username, repo_obj.name, tag_name, docker_image_id, - manifest_digest, manifest.json_data, reversion=True) + # Change the tag manifest to point to the updated image. + docker_image_id = tag_manifest.tag.image.docker_image_id + updated_tag = create_or_update_tag_for_repo(repo_obj.id, tag_name, docker_image_id, + reversion=True) + tag_manifest.tag = updated_tag + tag_manifest.save() return existing_image @@ -509,8 +532,8 @@ def restore_tag_to_image(repo_obj, tag_name, docker_image_id): return existing_image -def store_tag_manifest(namespace_name, repository_name, tag_name, docker_image_id, manifest_digest, - manifest_data, reversion=False): +def store_tag_manifest(namespace_name, repository_name, tag_name, manifest, leaf_layer_id=None, + reversion=False): """ Stores a tag manifest for a specific tag name in the database. Returns the TagManifest object, as well as a boolean indicating whether the TagManifest was created. """ @@ -519,25 +542,27 @@ def store_tag_manifest(namespace_name, repository_name, tag_name, docker_image_i except Repository.DoesNotExist: raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name)) - return store_tag_manifest_for_repo(repo.id, tag_name, docker_image_id, manifest_digest, - manifest_data, reversion=False) + return store_tag_manifest_for_repo(repo.id, tag_name, manifest, leaf_layer_id=leaf_layer_id, + reversion=False) -def store_tag_manifest_for_repo(repository_id, tag_name, docker_image_id, manifest_digest, - manifest_data, reversion=False): + +def store_tag_manifest_for_repo(repository_id, tag_name, manifest, leaf_layer_id=None, + reversion=False): """ Stores a tag manifest for a specific tag name in the database. Returns the TagManifest object, as well as a boolean indicating whether the TagManifest was created. """ + docker_image_id = leaf_layer_id or manifest.leaf_layer_v1_image_id with db_transaction(): tag = create_or_update_tag_for_repo(repository_id, tag_name, docker_image_id, reversion=reversion) try: - manifest = TagManifest.get(digest=manifest_digest) + manifest = TagManifest.get(digest=manifest.digest) manifest.tag = tag manifest.save() return manifest, False except TagManifest.DoesNotExist: - return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data), True + return _create_manifest(tag, manifest), True def get_active_tag(namespace, repo_name, tag_name): @@ -558,10 +583,33 @@ def get_possibly_expired_tag(namespace, repo_name, tag_name): Namespace.username == namespace)).get() -def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest_digest, - manifest_data): +def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest): tag = get_active_tag(namespace, repo_name, tag_name) - return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data) + return _create_manifest(tag, manifest) + + +def _create_manifest(tag, manifest): + # Lookup all blobs in the manifest. + blobs = ImageStorage.select().where(ImageStorage.content_checksum << list(manifest.blob_digests)) + blob_map = {} + for blob in blobs: + blob_map[blob.content_checksum] = blob + + with db_transaction(): + media_type = Manifest.media_type.get_id(manifest.media_type) + manifest_row = Manifest.create(digest=manifest.digest, repository=tag.repository, + manifest_bytes=manifest.bytes, media_type=media_type) + ManifestLegacyImage.create(manifest=manifest_row, repository=tag.repository, image=tag.image) + for index, blob_digest in enumerate(reversed(manifest.blob_digests)): + image_storage = blob_map.get(blob_digest) + if image_storage is None: + raise DataModelException('Missing blob for manifest') + + ManifestBlob.create(manifest=manifest_row, repository=tag.repository, blob=image_storage, + blob_index=index) + + return TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes, + manifest=manifest_row) def load_tag_manifest(namespace, repo_name, tag_name): diff --git a/data/model/test/test_basequery.py b/data/model/test/test_basequery.py index d3c18a849..84e248327 100644 --- a/data/model/test/test_basequery.py +++ b/data/model/test/test_basequery.py @@ -89,6 +89,9 @@ def test_filter_repositories(username, include_public, filter_to_namespace, repo .switch(Repository) .join(RepositoryPermission, JOIN.LEFT_OUTER)) + # Prime the cache. + Repository.kind.get_id('image') + with assert_query_count(1): found = list(filter_to_repos_for_user(query, user.id, namespace=namespace, diff --git a/data/model/test/test_gc.py b/data/model/test/test_gc.py index d8faa3b16..5ff79a2e4 100644 --- a/data/model/test/test_gc.py +++ b/data/model/test/test_gc.py @@ -4,13 +4,15 @@ import time from mock import patch -from app import storage +from app import storage, docker_v2_signing_key + from contextlib import contextmanager from playhouse.test_utils import assert_query_count from data import model, database from data.database import (Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel, - ApprBlob) + ApprBlob, Manifest, TagManifest) +from image.docker.schema1 import DockerSchema1ManifestBuilder from test.fixtures import * @@ -61,6 +63,20 @@ def create_image(docker_image_id, repository_obj, username): return image.storage +def store_tag_manifest(namespace, repo_name, tag_name, image_id): + builder = DockerSchema1ManifestBuilder(namespace, repo_name, tag_name) + try: + image_storage = ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).get() + builder.add_layer(image_storage.content_checksum, '{"id": "foo"}') + except ImageStorage.DoesNotExist: + pass + + manifest = builder.build(docker_v2_signing_key) + manifest_row, _ = model.tag.store_tag_manifest(namespace, repo_name, tag_name, manifest, + leaf_layer_id=image_id) + return manifest_row + + def create_repository(namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs): user = model.user.get_user(namespace) repo = model.repository.create_repository(namespace, name, user) @@ -86,8 +102,7 @@ def create_repository(namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs): parent=parent) # Set the tag for the image. - tag_manifest, _ = model.tag.store_tag_manifest(namespace, name, tag_name, image_ids[-1], - 'sha:someshahere', '{}') + tag_manifest = store_tag_manifest(namespace, name, tag_name, image_ids[-1]) # Add some labels to the tag. model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest') @@ -145,6 +160,13 @@ def _get_dangling_label_count(): return len(label_ids - referenced_by_manifest) +def _get_dangling_manifest_count(): + manifest_ids = set([current.id for current in Manifest.select()]) + referenced_by_tag_manifest = set([manifest.manifest_id for manifest in TagManifest.select()]) + return len(manifest_ids - referenced_by_tag_manifest) + + + @contextmanager def assert_gc_integrity(expect_storage_removed=True): """ Specialized assertion for ensuring that GC cleans up all dangling storages @@ -158,15 +180,19 @@ def assert_gc_integrity(expect_storage_removed=True): # Store the number of dangling storages and labels. existing_storage_count = _get_dangling_storage_count() existing_label_count = _get_dangling_label_count() + existing_manifest_count = _get_dangling_manifest_count() yield - # Ensure the number of dangling storages and labels has not changed. + # Ensure the number of dangling storages, manifests and labels has not changed. updated_storage_count = _get_dangling_storage_count() assert updated_storage_count == existing_storage_count updated_label_count = _get_dangling_label_count() assert updated_label_count == existing_label_count + updated_manifest_count = _get_dangling_manifest_count() + assert updated_manifest_count == existing_manifest_count + # Ensure that for each call to the image+storage cleanup callback, the image and its # storage is not found *anywhere* in the database. for removed_image_and_storage in removed_image_storages: @@ -466,13 +492,11 @@ def test_images_shared_storage(default_tag_policy, initialized_db): repository=repository, storage=image_storage, ancestors='/') - model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, - 'first', first_image.docker_image_id, - 'sha:someshahere', '{}') + store_tag_manifest(repository.namespace_user.username, repository.name, + 'first', first_image.docker_image_id) - model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, - 'second', second_image.docker_image_id, - 'sha:someshahere', '{}') + store_tag_manifest(repository.namespace_user.username, repository.name, + 'second', second_image.docker_image_id) # Delete the first tag. delete_tag(repository, 'first') @@ -505,9 +529,8 @@ def test_image_with_cas(default_tag_policy, initialized_db): repository=repository, storage=image_storage, ancestors='/') - model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, - 'first', first_image.docker_image_id, - 'sha:someshahere1', '{}') + store_tag_manifest(repository.namespace_user.username, repository.name, + 'first', first_image.docker_image_id) assert_not_deleted(repository, 'i1') @@ -553,13 +576,11 @@ def test_images_shared_cas(default_tag_policy, initialized_db): repository=repository, storage=is2, ancestors='/') - model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, - 'first', first_image.docker_image_id, - 'sha:someshahere1', '{}') + store_tag_manifest(repository.namespace_user.username, repository.name, + 'first', first_image.docker_image_id) - model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, - 'second', second_image.docker_image_id, - 'sha:someshahere2', '{}') + store_tag_manifest(repository.namespace_user.username, repository.name, + 'second', second_image.docker_image_id) assert_not_deleted(repository, 'i1', 'i2') @@ -602,9 +623,8 @@ def test_images_shared_cas_with_new_blob_table(default_tag_policy, initialized_d repository=repository, storage=is1, ancestors='/') - model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, - 'first', first_image.docker_image_id, - 'sha:someshahere1', '{}') + store_tag_manifest(repository.namespace_user.username, repository.name, + 'first', first_image.docker_image_id) assert_not_deleted(repository, 'i1') diff --git a/data/model/test/test_tag.py b/data/model/test/test_tag.py index b9ab2e3b2..4cdb94acb 100644 --- a/data/model/test/test_tag.py +++ b/data/model/test/test_tag.py @@ -1,19 +1,26 @@ -import pytest +import json from datetime import datetime -from mock import patch from time import time -from data.database import Image, RepositoryTag, ImageStorage, Repository +import pytest + +from mock import patch + +from app import docker_v2_signing_key +from data.database import (Image, RepositoryTag, ImageStorage, Repository, Manifest, ManifestBlob, + ManifestLegacyImage) from data.model.repository import create_repository from data.model.tag import (list_active_repo_tags, create_or_update_tag, delete_tag, get_matching_tags, _tag_alive, get_matching_tags_for_images, - change_tag_expiration, get_active_tag) + change_tag_expiration, get_active_tag, store_tag_manifest) from data.model.image import find_create_or_link_image +from image.docker.schema1 import DockerSchema1ManifestBuilder from util.timedeltastring import convert_to_timedelta from test.fixtures import * + def _get_expected_tags(image): expected_query = (RepositoryTag .select() @@ -211,3 +218,31 @@ def test_change_tag_expiration(expiration_offset, expected_offset, initialized_d end_date = datetime.utcfromtimestamp(footag_updated.lifetime_end_ts) expected_end_date = start_date + convert_to_timedelta(expected_offset) assert (expected_end_date - end_date).total_seconds() < 5 # variance in test + + +def test_store_tag_manifest(initialized_db): + # Create a manifest with some layers. + builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'sometag') + + storages = list(ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).limit(10)) + assert storages + + repo = model.repository.get_repository('devtable', 'simple') + for index, storage in enumerate(storages): + image_id = 'someimage%s' % index + builder.add_layer(storage.content_checksum, json.dumps({'id': image_id})) + find_create_or_link_image(image_id, repo, 'devtable', {}, 'local_us') + + manifest = builder.build(docker_v2_signing_key) + tag_manifest, _ = store_tag_manifest('devtable', 'simple', 'sometag', manifest) + + # Ensure we have the new-model expected rows. + assert tag_manifest.manifest is not None + assert tag_manifest.manifest.manifest_bytes == manifest.bytes + assert tag_manifest.manifest.digest == str(manifest.digest) + + blob_rows = {m.blob_id for m in + ManifestBlob.select().where(ManifestBlob.manifest == tag_manifest.manifest)} + assert blob_rows == {s.id for s in storages} + + assert ManifestLegacyImage.get(manifest=tag_manifest.manifest).image == tag_manifest.tag.image diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index d298c5240..468a64cdf 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -191,8 +191,7 @@ def _write_manifest(namespace_name, repo_name, manifest): # Store the manifest pointing to the tag. leaf_layer_id = rewritten_images[-1].image_id - newly_created = model.save_manifest(repo, manifest.tag, leaf_layer_id, manifest.digest, - manifest.bytes) + newly_created = model.save_manifest(repo, manifest.tag, manifest, leaf_layer_id) if newly_created: # TODO: make this batch labels = [] @@ -279,6 +278,5 @@ def _generate_and_store_manifest(namespace_name, repo_name, tag_name): manifest = builder.build(docker_v2_signing_key) # Write the manifest to the DB. - model.create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest.digest, - manifest.bytes) + model.create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest) return manifest diff --git a/endpoints/v2/models_interface.py b/endpoints/v2/models_interface.py index da67c96ac..7ba41a504 100644 --- a/endpoints/v2/models_interface.py +++ b/endpoints/v2/models_interface.py @@ -138,11 +138,10 @@ class DockerRegistryV2DataInterface(object): pass @abstractmethod - def create_manifest_and_update_tag(self, namespace_name, repo_name, tag_name, manifest_digest, - manifest_bytes): + def create_manifest_and_update_tag(self, namespace_name, repo_name, tag_name, manifest): """ - Creates a new manifest with the given digest and byte data, and assigns the tag with the given - name under the matching repository to it. + Creates a new manifest and assigns the tag with the given name under the matching repository to + it. """ pass @@ -156,11 +155,9 @@ class DockerRegistryV2DataInterface(object): pass @abstractmethod - def save_manifest(self, repository, tag_name, leaf_layer_docker_id, manifest_digest, - manifest_bytes): + def save_manifest(self, repository, tag_name, manifest): """ - Saves a manifest pointing to the given leaf image, with the given manifest, under the matching - repository as a tag with the given name. + Saves a manifest, under the matching repository as a tag with the given name. Returns a boolean whether or not the tag was newly created or not. """ diff --git a/endpoints/v2/models_pre_oci.py b/endpoints/v2/models_pre_oci.py index 2b86f43a2..81732052c 100644 --- a/endpoints/v2/models_pre_oci.py +++ b/endpoints/v2/models_pre_oci.py @@ -11,9 +11,10 @@ from endpoints.v2.models_interface import ( RepositoryReference, Tag,) from image.docker.v1 import DockerV1Metadata +from image.docker.interfaces import ManifestInterface +from image.docker.schema1 import DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE -_MEDIA_TYPE = "application/vnd.docker.distribution.manifest.v1+prettyjws" - +_MEDIA_TYPE = DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE class PreOCIModel(DockerRegistryV2DataInterface): """ @@ -90,11 +91,10 @@ class PreOCIModel(DockerRegistryV2DataInterface): parents = model.image.get_parent_images(namespace_name, repo_name, repo_image) return [_docker_v1_metadata(namespace_name, repo_name, image) for image in parents] - def create_manifest_and_update_tag(self, namespace_name, repo_name, tag_name, manifest_digest, - manifest_bytes): + def create_manifest_and_update_tag(self, namespace_name, repo_name, tag_name, manifest): + assert isinstance(manifest, ManifestInterface) try: - model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, - manifest_digest, manifest_bytes) + model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, manifest) except IntegrityError: # It's already there! pass @@ -112,10 +112,10 @@ class PreOCIModel(DockerRegistryV2DataInterface): parent_image) return _docker_v1_metadata(repository.namespace_name, repository.name, repo_image) - def save_manifest(self, repository, tag_name, leaf_layer_docker_id, manifest_digest, - manifest_bytes): - (_, newly_created) = model.tag.store_tag_manifest_for_repo( - repository.id, tag_name, leaf_layer_docker_id, manifest_digest, manifest_bytes) + def save_manifest(self, repository, tag_name, manifest, leaf_layer_id=None): + assert isinstance(manifest, ManifestInterface) + (_, newly_created) = model.tag.store_tag_manifest_for_repo(repository.id, tag_name, manifest, + leaf_layer_id=leaf_layer_id) return newly_created def repository_tags(self, namespace_name, repo_name, start_id, limit): diff --git a/initdb.py b/initdb.py index bda65953f..047ae1356 100644 --- a/initdb.py +++ b/initdb.py @@ -20,12 +20,13 @@ from data.database import (db, all_models, Role, TeamRole, Visibility, LoginServ QuayRegion, QuayService, UserRegion, OAuthAuthorizationCode, ServiceKeyApprovalType, MediaType, LabelSourceType, UserPromptKind, RepositoryKind, User, DisableReason, DeletedNamespace, appr_classes, - ApprTagKind, ApprBlobPlacementLocation) + ApprTagKind, ApprBlobPlacementLocation, Repository) from data import model from data.queue import WorkQueue from app import app, storage as store, tf from storage.basestorage import StoragePaths from endpoints.v2.manifest import _generate_and_store_manifest +from image.docker.schema1 import DOCKER_SCHEMA1_CONTENT_TYPES from workers import repositoryactioncounter @@ -243,6 +244,9 @@ def setup_database_for_testing(testcase, with_storage=False, force_rebuild=False db_initialized_for_testing.set() + # Initialize caches. + Repository.kind.get_id('image') + # Create a savepoint for the testcase. testcases[testcase] = {} testcases[testcase]['transaction'] = db.transaction() @@ -423,6 +427,9 @@ def initialize_database(): MediaType.create(name='application/vnd.cnr.manifests.v0.json') MediaType.create(name='application/vnd.cnr.manifest.list.v0.json') + for media_type in DOCKER_SCHEMA1_CONTENT_TYPES: + MediaType.create(name=media_type) + LabelSourceType.create(name='manifest') LabelSourceType.create(name='api', mutable=True) LabelSourceType.create(name='internal')