From fdcb8bad23b997760f669f36518e9c587ce097d5 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 5 Nov 2018 13:03:08 -0500 Subject: [PATCH] Implement the new OCI-based registry data model Note that this change does *not* enable the new data model by default, but does allow it to be used when a special environment variable is specified. --- data/database.py | 2 +- data/model/oci/__init__.py | 8 + data/model/oci/label.py | 126 ++++++ data/model/oci/manifest.py | 134 ++++++ data/model/oci/shared.py | 24 + data/model/oci/tag.py | 372 ++++++++++++++++ data/model/oci/test/test_oci_label.py | 87 ++++ data/model/oci/test/test_oci_manifest.py | 83 ++++ data/model/oci/test/test_oci_tag.py | 253 +++++++++++ data/model/tag.py | 9 +- data/registry_model/__init__.py | 11 +- data/registry_model/datatypes.py | 44 +- data/registry_model/registry_oci_model.py | 413 ++++++++++++++++++ data/registry_model/registry_pre_oci_model.py | 130 +----- data/registry_model/shared.py | 133 +++++- data/registry_model/test/test_interface.py | 134 +++--- .../test/test_manifestbuilder.py | 19 +- endpoints/api/tag.py | 1 + endpoints/v2/test/test_manifest.py | 2 +- initdb.py | 31 +- test/registry/protocol_fixtures.py | 14 + test/registry/registry_tests.py | 24 + test/test_api_usage.py | 2 +- 23 files changed, 1847 insertions(+), 209 deletions(-) create mode 100644 data/model/oci/__init__.py create mode 100644 data/model/oci/label.py create mode 100644 data/model/oci/manifest.py create mode 100644 data/model/oci/shared.py create mode 100644 data/model/oci/tag.py create mode 100644 data/model/oci/test/test_oci_label.py create mode 100644 data/model/oci/test/test_oci_manifest.py create mode 100644 data/model/oci/test/test_oci_tag.py create mode 100644 data/registry_model/registry_oci_model.py diff --git a/data/database.py b/data/database.py index 6ea7dd473..2649c048b 100644 --- a/data/database.py +++ b/data/database.py @@ -1232,7 +1232,7 @@ class Label(BaseModel): key = CharField(index=True) value = TextField() media_type = EnumField(MediaType) - source_type = ForeignKeyField(LabelSourceType) + source_type = EnumField(LabelSourceType) class ApprBlob(BaseModel): diff --git a/data/model/oci/__init__.py b/data/model/oci/__init__.py new file mode 100644 index 000000000..2bdea0ae3 --- /dev/null +++ b/data/model/oci/__init__.py @@ -0,0 +1,8 @@ +# There MUST NOT be any circular dependencies between these subsections. If there are fix it by +# moving the minimal number of things to shared +from data.model.oci import ( + label, + manifest, + shared, + tag, +) diff --git a/data/model/oci/label.py b/data/model/oci/label.py new file mode 100644 index 000000000..3300c4e86 --- /dev/null +++ b/data/model/oci/label.py @@ -0,0 +1,126 @@ +import logging + + +from data.model import InvalidLabelKeyException, InvalidMediaTypeException, DataModelException +from data.database import (Label, Manifest, TagManifestLabel, MediaType, LabelSourceType, + db_transaction, ManifestLabel, TagManifestLabelMap, + TagManifestToManifest) +from data.text import prefix_search +from util.validation import validate_label_key +from util.validation import is_json + +logger = logging.getLogger(__name__) + +def list_manifest_labels(manifest_id, prefix_filter=None): + """ Lists all labels found on the given manifest, with an optional filter by key prefix. """ + query = (Label + .select(Label, MediaType) + .join(MediaType) + .switch(Label) + .join(LabelSourceType) + .switch(Label) + .join(ManifestLabel) + .where(ManifestLabel.manifest == manifest_id)) + + if prefix_filter is not None: + query = query.where(prefix_search(Label.key, prefix_filter)) + + return query + + +def get_manifest_label(label_uuid, manifest): + """ Retrieves the manifest label on the manifest with the given UUID or None if none. """ + try: + return (Label + .select(Label, LabelSourceType) + .join(LabelSourceType) + .where(Label.uuid == label_uuid) + .switch(Label) + .join(ManifestLabel) + .where(ManifestLabel.manifest == manifest) + .get()) + except Label.DoesNotExist: + return None + + +def create_manifest_label(manifest_id, key, value, source_type_name, media_type_name=None): + """ Creates a new manifest label on a specific tag manifest. """ + if not key: + raise InvalidLabelKeyException() + + # Note that we don't prevent invalid label names coming from the manifest to be stored, as Docker + # does not currently prevent them from being put into said manifests. + if not validate_label_key(key) and source_type_name != 'manifest': + raise InvalidLabelKeyException('Key `%s` is invalid' % key) + + # Find the matching media type. If none specified, we infer. + if media_type_name is None: + media_type_name = 'text/plain' + if is_json(value): + media_type_name = 'application/json' + + try: + media_type_id = Label.media_type.get_id(media_type_name) + except MediaType.DoesNotExist: + raise InvalidMediaTypeException() + + source_type_id = Label.source_type.get_id(source_type_name) + + # Ensure the manifest exists. + try: + manifest = Manifest.get(id=manifest_id) + except Manifest.DoesNotExist: + return None + + with db_transaction(): + label = Label.create(key=key, value=value, source_type=source_type_id, media_type=media_type_id) + manifest_label = ManifestLabel.create(manifest=manifest_id, label=label, + repository=manifest.repository) + + # If there exists a mapping to a TagManifest, add the old-style label. + # TODO(jschorr): Remove this code once the TagManifest table is gone. + try: + mapping_row = TagManifestToManifest.get(manifest=manifest) + tag_manifest_label = TagManifestLabel.create(annotated=mapping_row.tag_manifest, label=label, + repository=manifest.repository) + TagManifestLabelMap.create(manifest_label=manifest_label, + tag_manifest_label=tag_manifest_label, + label=label, + manifest=manifest, + tag_manifest=mapping_row.tag_manifest) + except TagManifestToManifest.DoesNotExist: + pass + + return label + + +def delete_manifest_label(label_uuid, manifest): + """ Deletes the manifest label on the tag manifest with the given ID. Returns the label deleted + or None if none. + """ + # Find the label itself. + label = get_manifest_label(label_uuid, manifest) + if label is None: + return None + + if not label.source_type.mutable: + raise DataModelException('Cannot delete immutable label') + + # Delete the mapping records and label. + # TODO(jschorr): Remove this code once the TagManifest table is gone. + with db_transaction(): + (TagManifestLabelMap + .delete() + .where(TagManifestLabelMap.label == label) + .execute()) + + deleted_count = TagManifestLabel.delete().where(TagManifestLabel.label == label).execute() + if deleted_count != 1: + logger.warning('More than a single label deleted for matching label %s', label_uuid) + + deleted_count = ManifestLabel.delete().where(ManifestLabel.label == label).execute() + if deleted_count != 1: + logger.warning('More than a single label deleted for matching label %s', label_uuid) + + label.delete_instance(recursive=False) + return label diff --git a/data/model/oci/manifest.py b/data/model/oci/manifest.py new file mode 100644 index 000000000..a677df0ab --- /dev/null +++ b/data/model/oci/manifest.py @@ -0,0 +1,134 @@ +import logging + +from peewee import IntegrityError + +from data.database import Tag, Manifest, ManifestBlob, ManifestLegacyImage, db_transaction +from data.model.oci.tag import filter_to_alive_tags +from data.model.storage import lookup_repo_storages_by_content_checksum +from data.model.image import lookup_repository_images, get_image, synthesize_v1_image +from image.docker.schema1 import DockerSchema1Manifest, ManifestException + +logger = logging.getLogger(__name__) + +def lookup_manifest(repository_id, manifest_digest, allow_dead=False): + """ Returns the manifest with the specified digest under the specified repository + or None if none. If allow_dead is True, then manifests referenced by only + dead tags will also be returned. + """ + query = (Manifest + .select() + .where(Manifest.repository == repository_id) + .where(Manifest.digest == manifest_digest)) + + if not allow_dead: + query = filter_to_alive_tags(query.join(Tag)).group_by(Manifest.id) + + try: + return query.get() + except Manifest.DoesNotExist: + return None + + +def get_or_create_manifest(repository_id, manifest_interface_instance): + """ Returns a tuple of the manifest in the specified repository with the matching digest + (if it already exists) or, if not yet created, creates and returns the manifest, as well as + if the manifest was created. Returns (None, None) if there was an error creating the manifest. + Note that *all* blobs referenced by the manifest must exist already in the repository or this + method will fail with a (None, None). + """ + existing = lookup_manifest(repository_id, manifest_interface_instance.digest, allow_dead=True) + if existing is not None: + return existing, False + + assert len(list(manifest_interface_instance.layers)) > 0 + + # TODO(jschorr): Switch this to supporting schema2 once we're ready. + assert isinstance(manifest_interface_instance, DockerSchema1Manifest) + + # Ensure all the blobs in the manifest exist. + digests = manifest_interface_instance.checksums + query = lookup_repo_storages_by_content_checksum(repository_id, digests) + blob_map = {s.content_checksum: s for s in query} + for digest_str in manifest_interface_instance.blob_digests: + if digest_str not in blob_map: + logger.warning('Unknown blob `%s` under manifest `%s` for repository `%s`', digest_str, + manifest_interface_instance.digest, repository_id) + return None, None + + # Determine and populate the legacy image if necessary. + legacy_image_id = _populate_legacy_image(repository_id, manifest_interface_instance, blob_map) + if legacy_image_id is None: + return None, None + + legacy_image = get_image(repository_id, legacy_image_id) + if legacy_image is None: + return None, None + + # Create the manifest and its blobs. + media_type = Manifest.media_type.get_id(manifest_interface_instance.content_type) + storage_ids = {storage.id for storage in blob_map.values()} + + with db_transaction(): + # Create the manifest. + try: + manifest = Manifest.create(repository=repository_id, + digest=manifest_interface_instance.digest, + media_type=media_type, + manifest_bytes=manifest_interface_instance.bytes) + except IntegrityError: + manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest) + return manifest, False + + # Insert the blobs. + blobs_to_insert = [dict(manifest=manifest, repository=repository_id, + blob=storage_id) for storage_id in storage_ids] + if blobs_to_insert: + ManifestBlob.insert_many(blobs_to_insert).execute() + + # Set the legacy image (if applicable). + ManifestLegacyImage.create(repository=repository_id, image=legacy_image, manifest=manifest) + + return manifest, True + + +def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map): + # Lookup all the images and their parent images (if any) inside the manifest. + # This will let us know which v1 images we need to synthesize and which ones are invalid. + docker_image_ids = list(manifest_interface_instance.legacy_image_ids) + images_query = lookup_repository_images(repository_id, docker_image_ids) + image_storage_map = {i.docker_image_id: i.storage for i in images_query} + + # Rewrite any v1 image IDs that do not match the checksum in the database. + try: + rewritten_images = manifest_interface_instance.rewrite_invalid_image_ids(image_storage_map) + rewritten_images = list(rewritten_images) + parent_image_map = {} + + for rewritten_image in rewritten_images: + if not rewritten_image.image_id in image_storage_map: + parent_image = None + if rewritten_image.parent_image_id: + parent_image = parent_image_map.get(rewritten_image.parent_image_id) + if parent_image is None: + parent_image = get_image(repository_id, rewritten_image.parent_image_id) + if parent_image is None: + return None + + synthesized = synthesize_v1_image( + repository_id, + blob_map[rewritten_image.content_checksum].id, + blob_map[rewritten_image.content_checksum].image_size, + rewritten_image.image_id, + rewritten_image.created, + rewritten_image.comment, + rewritten_image.command, + rewritten_image.compat_json, + parent_image, + ) + + parent_image_map[rewritten_image.image_id] = synthesized + except ManifestException: + logger.exception("exception when rewriting v1 metadata") + return None + + return rewritten_images[-1].image_id diff --git a/data/model/oci/shared.py b/data/model/oci/shared.py new file mode 100644 index 000000000..887eda383 --- /dev/null +++ b/data/model/oci/shared.py @@ -0,0 +1,24 @@ +from data.database import Manifest, ManifestLegacyImage, Image + +def get_legacy_image_for_manifest(manifest_id): + """ Returns the legacy image associated with the given manifest, if any, or None if none. """ + try: + query = (ManifestLegacyImage + .select(ManifestLegacyImage, Image) + .join(Image) + .where(ManifestLegacyImage.manifest == manifest_id)) + return query.get().image + except ManifestLegacyImage.DoesNotExist: + return None + + +def get_manifest_for_legacy_image(image_id): + """ Returns a manifest that is associated with the given image, if any, or None if none. """ + try: + query = (ManifestLegacyImage + .select(ManifestLegacyImage, Manifest) + .join(Manifest) + .where(ManifestLegacyImage.image == image_id)) + return query.get().manifest + except ManifestLegacyImage.DoesNotExist: + return None diff --git a/data/model/oci/tag.py b/data/model/oci/tag.py new file mode 100644 index 000000000..fb0439051 --- /dev/null +++ b/data/model/oci/tag.py @@ -0,0 +1,372 @@ +import logging + +from calendar import timegm + +from data.database import (Tag, Manifest, ManifestLegacyImage, Image, ImageStorage, + MediaType, RepositoryTag, TagManifest, TagManifestToManifest, + get_epoch_timestamp_ms, db_transaction) +from data.database import TagToRepositoryTag, RepositoryTag, db_for_update +from data.model.oci.shared import get_legacy_image_for_manifest +from data.model import config +from image.docker.schema1 import (DOCKER_SCHEMA1_CONTENT_TYPES, DockerSchema1Manifest, + MalformedSchema1Manifest) +from util.timedeltastring import convert_to_timedelta + +logger = logging.getLogger(__name__) + + +def get_tag_by_id(tag_id): + """ Returns the tag with the given ID, joined with its manifest or None if none. """ + try: + return Tag.select(Tag, Manifest).join(Manifest).where(Tag.id == tag_id).get() + except Tag.DoesNotExist: + return None + + +def get_tag(repository_id, tag_name): + """ Returns the alive, non-hidden tag with the given name under the specified repository or + None if none. The tag is returned joined with its manifest. + """ + query = (Tag + .select(Tag, Manifest) + .join(Manifest) + .where(Tag.repository == repository_id) + .where(Tag.name == tag_name)) + + query = filter_to_visible_tags(query) + query = filter_to_alive_tags(query) + + try: + return query.get() + except Tag.DoesNotExist: + return None + + +def list_alive_tags(repository_id, start_pagination_id=None, limit=None): + """ Returns a list of all the tags alive in the specified repository, with optional limits. + Tag's returned are joined with their manifest. + """ + query = (Tag + .select(Tag, Manifest) + .join(Manifest) + .where(Tag.repository == repository_id)) + + if start_pagination_id is not None: + query = query.where(Tag.id >= start_pagination_id) + + if limit is not None: + query = query.limit(limit) + + return filter_to_visible_tags(filter_to_alive_tags(query)) + + +def list_repository_tag_history(repository_id, page, page_size, specific_tag_name=None, + active_tags_only=False): + """ Returns a tuple of the full set of tags found in the specified repository, including those + that are no longer alive (unless active_tags_only is True), and whether additional tags exist. + If specific_tag_name is given, the tags are further filtered by name. + """ + query = (Tag + .select(Tag, Manifest) + .join(Manifest) + .where(Tag.repository == repository_id) + .order_by(Tag.lifetime_start_ms.desc(), Tag.name) + .limit(page_size + 1) + .offset(page_size * (page - 1))) + + if specific_tag_name is not None: + query = query.where(Tag.name == specific_tag_name) + + if active_tags_only: + query = filter_to_alive_tags(query) + + query = filter_to_visible_tags(query) + results = list(query) + + return results[0:page_size], len(results) > page_size + + +def get_legacy_images_for_tags(tags): + """ Returns a map from tag ID to the legacy image for the tag. """ + if not tags: + return {} + + query = (ManifestLegacyImage + .select(ManifestLegacyImage, Image, ImageStorage) + .join(Image) + .join(ImageStorage) + .where(ManifestLegacyImage.manifest << [tag.manifest_id for tag in tags])) + + by_manifest = {mli.manifest_id: mli.image for mli in query} + return {tag.id: by_manifest[tag.manifest_id] for tag in tags} + + +def find_matching_tag(repository_id, tag_names, tag_kinds=None): + """ Finds an alive tag in the specified repository with one of the specified tag names and + returns it or None if none. Tag's returned are joined with their manifest. + """ + assert repository_id + assert tag_names + + query = (Tag + .select(Tag, Manifest) + .join(Manifest) + .where(Tag.repository == repository_id) + .where(Tag.name << tag_names)) + + if tag_kinds: + query = query.where(Tag.tag_kind << tag_kinds) + + try: + return filter_to_visible_tags(filter_to_alive_tags(query)).get() + except Tag.DoesNotExist: + return None + + +def get_most_recent_tag(repository_id): + """ Returns the most recently pushed alive tag in the specified repository or None if none. + The Tag returned is joined with its manifest. + """ + assert repository_id + + query = (Tag + .select(Tag, Manifest) + .join(Manifest) + .where(Tag.repository == repository_id) + .order_by(Tag.lifetime_start_ms.desc())) + + try: + return filter_to_visible_tags(filter_to_alive_tags(query)).get() + except Tag.DoesNotExist: + return None + + +def get_expired_tag(repository_id, tag_name): + """ Returns a tag with the given name that is expired in the repository or None if none. + """ + try: + return (Tag + .select() + .where(Tag.name == tag_name, Tag.repository == repository_id) + .where(~(Tag.lifetime_end_ms >> None)) + .where(Tag.lifetime_end_ms <= get_epoch_timestamp_ms()) + .get()) + except Tag.DoesNotExist: + return None + + +def retarget_tag(tag_name, manifest_id, is_reversion=False, now_ms=None): + """ Creates or updates a tag with the specified name to point to the given manifest under + its repository. If this action is a reversion to a previous manifest, is_reversion + should be set to True. Returns the newly created tag row or None on error. + """ + try: + manifest = (Manifest + .select(Manifest, MediaType) + .join(MediaType) + .where(Manifest.id == manifest_id) + .get()) + except Manifest.DoesNotExist: + return None + + # CHECK: Make sure that we are not mistargeting a schema 1 manifest to a tag with a different + # name. + if manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES: + try: + parsed = DockerSchema1Manifest(manifest.manifest_bytes, validate=False) + if parsed.tag != tag_name: + logger.error('Tried to re-target schema1 manifest with tag `%s` to tag `%s', parsed.tag, + tag_name) + return None + except MalformedSchema1Manifest: + logger.exception('Could not parse schema1 manifest') + return None + + legacy_image = get_legacy_image_for_manifest(manifest) + now_ms = now_ms or get_epoch_timestamp_ms() + now_ts = int(now_ms / 1000) + + with db_transaction(): + # Lookup an existing tag in the repository with the same name and, if present, mark it + # as expired. + existing_tag = get_tag(manifest.repository_id, tag_name) + if existing_tag is not None: + _, okay = set_tag_end_ms(existing_tag, now_ms) + + # TODO: should we retry here and/or use a for-update? + if not okay: + return None + + # Create a new tag pointing to the manifest with a lifetime start of now. + created = Tag.create(name=tag_name, repository=manifest.repository_id, lifetime_start_ms=now_ms, + reversion=is_reversion, manifest=manifest, + tag_kind=Tag.tag_kind.get_id('tag')) + + # TODO(jschorr): Remove the linkage code once RepositoryTag is gone. + # If this is a schema 1 manifest, then add a TagManifest linkage to it. Otherwise, it will only + # be pullable via the new OCI model. + if manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES and legacy_image is not None: + old_style_tag = RepositoryTag.create(repository=manifest.repository_id, image=legacy_image, + name=tag_name, lifetime_start_ts=now_ts, + reversion=is_reversion) + TagToRepositoryTag.create(tag=created, repository_tag=old_style_tag, + repository=manifest.repository_id) + + tag_manifest = TagManifest.create(tag=old_style_tag, digest=manifest.digest, + json_data=manifest.manifest_bytes) + TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest, + repository=manifest.repository_id) + + return created + + +def delete_tag(repository_id, tag_name): + """ Deletes the alive tag with the given name in the specified repository and returns the deleted + tag. If the tag did not exist, returns None. + """ + tag = get_tag(repository_id, tag_name) + if tag is None: + return None + + return _delete_tag(tag, get_epoch_timestamp_ms()) + + +def _delete_tag(tag, now_ms): + """ Deletes the given tag by marking it as expired. """ + now_ts = int(now_ms / 1000) + + with db_transaction(): + updated = (Tag + .update(lifetime_end_ms=now_ms) + .where(Tag.id == tag.id, Tag.lifetime_end_ms == tag.lifetime_end_ms) + .execute()) + if updated != 1: + return None + + # TODO(jschorr): Remove the linkage code once RepositoryTag is gone. + try: + old_style_tag = (TagToRepositoryTag + .select(TagToRepositoryTag, RepositoryTag) + .join(RepositoryTag) + .where(TagToRepositoryTag.tag == tag) + .get()).repository_tag + + old_style_tag.lifetime_end_ts = now_ts + old_style_tag.save() + except TagToRepositoryTag.DoesNotExist: + pass + + return tag + + +def delete_tags_for_manifest(manifest): + """ Deletes all tags pointing to the given manifest. Returns the list of tags + deleted. + """ + tags = list(Tag.select().where(Tag.manifest == manifest)) + now_ms = get_epoch_timestamp_ms() + + with db_transaction(): + for tag in tags: + _delete_tag(tag, now_ms) + + return tags + + +def filter_to_visible_tags(query): + """ Adjusts the specified Tag query to only return those tags that are visible. + """ + return query.where(Tag.hidden == False) + + +def filter_to_alive_tags(query, now_ms=None): + """ Adjusts the specified Tag query to only return those tags alive. If now_ms is specified, + the given timestamp (in MS) is used in place of the current timestamp for determining wherther + a tag is alive. + """ + if now_ms is None: + now_ms = get_epoch_timestamp_ms() + + return query.where((Tag.lifetime_end_ms >> None) | (Tag.lifetime_end_ms > now_ms)) + + +def set_tag_expiration_sec_for_manifest(manifest_id, expiration_seconds): + """ Sets the tag expiration for any tags that point to the given manifest ID. """ + query = Tag.select().where(Tag.manifest == manifest_id) + query = filter_to_alive_tags(query) + query = filter_to_visible_tags(query) + tags = list(query) + for tag in tags: + set_tag_end_ms(tag, tag.lifetime_start_ms + (expiration_seconds * 1000)) + + return tags + + +def set_tag_expiration_for_manifest(manifest_id, expiration_datetime): + """ Sets the tag expiration for any tags that point to the given manifest ID. """ + query = Tag.select().where(Tag.manifest == manifest_id) + query = filter_to_alive_tags(query) + query = filter_to_visible_tags(query) + tags = list(query) + for tag in tags: + change_tag_expiration(tag, expiration_datetime) + + return tags + + +def change_tag_expiration(tag_id, expiration_datetime): + """ Changes the expiration of the specified tag to the given expiration datetime. If + the expiration datetime is None, then the tag is marked as not expiring. Returns + a tuple of the previous expiration timestamp in seconds (if any), and whether the + operation succeeded. + """ + try: + tag = Tag.get(id=tag_id) + except Tag.DoesNotExist: + return (None, False) + + new_end_ms = None + min_expire_sec = convert_to_timedelta(config.app_config.get('LABELED_EXPIRATION_MINIMUM', '1h')) + max_expire_sec = convert_to_timedelta(config.app_config.get('LABELED_EXPIRATION_MAXIMUM', '104w')) + + if expiration_datetime is not None: + lifetime_start_ts = int(tag.lifetime_start_ms / 1000) + + offset = timegm(expiration_datetime.utctimetuple()) - lifetime_start_ts + offset = min(max(offset, min_expire_sec.total_seconds()), max_expire_sec.total_seconds()) + new_end_ms = tag.lifetime_start_ms + (offset * 1000) + + if new_end_ms == tag.lifetime_end_ms: + return (None, True) + + return set_tag_end_ms(tag, new_end_ms) + + +def set_tag_end_ms(tag, end_ms): + """ Sets the end timestamp for a tag. Should only be called by change_tag_expiration + or tests. + """ + + with db_transaction(): + updated = (Tag + .update(lifetime_end_ms=end_ms) + .where(Tag.id == tag) + .where(Tag.lifetime_end_ms == tag.lifetime_end_ms) + .execute()) + if updated != 1: + return (None, False) + + # TODO(jschorr): Remove the linkage code once RepositoryTag is gone. + try: + old_style_tag = (TagToRepositoryTag + .select(TagToRepositoryTag, RepositoryTag) + .join(RepositoryTag) + .where(TagToRepositoryTag.tag == tag) + .get()).repository_tag + + old_style_tag.lifetime_end_ts = end_ms / 1000 + old_style_tag.save() + except TagToRepositoryTag.DoesNotExist: + pass + + return (tag.lifetime_end_ms, True) diff --git a/data/model/oci/test/test_oci_label.py b/data/model/oci/test/test_oci_label.py new file mode 100644 index 000000000..2ba04521b --- /dev/null +++ b/data/model/oci/test/test_oci_label.py @@ -0,0 +1,87 @@ +import pytest + +from playhouse.test_utils import assert_query_count + +from data.database import Manifest, ManifestLabel +from data.model.oci.label import (create_manifest_label, list_manifest_labels, get_manifest_label, + delete_manifest_label, DataModelException) + +from test.fixtures import * + + +@pytest.mark.parametrize('key, value, source_type, expected_error', [ + ('foo', 'bar', 'manifest', None), + + pytest.param('..foo', 'bar', 'manifest', None, id='invalid key on manifest'), + pytest.param('..foo', 'bar', 'api', 'is invalid', id='invalid key on api'), +]) +def test_create_manifest_label(key, value, source_type, expected_error, initialized_db): + manifest = Manifest.get() + + if expected_error: + with pytest.raises(DataModelException) as ex: + create_manifest_label(manifest, key, value, source_type) + + assert ex.match(expected_error) + return + + label = create_manifest_label(manifest, key, value, source_type) + labels = [ml.label_id for ml in ManifestLabel.select().where(ManifestLabel.manifest == manifest)] + assert label.id in labels + + with assert_query_count(1): + assert label in list_manifest_labels(manifest) + + assert label not in list_manifest_labels(manifest, 'someprefix') + assert label in list_manifest_labels(manifest, key[0:2]) + + with assert_query_count(1): + assert get_manifest_label(label.uuid, manifest) == label + + +def test_list_manifest_labels(initialized_db): + manifest = Manifest.get() + + label1 = create_manifest_label(manifest, 'foo', '1', 'manifest') + label2 = create_manifest_label(manifest, 'bar', '2', 'api') + label3 = create_manifest_label(manifest, 'baz', '3', 'internal') + + assert label1 in list_manifest_labels(manifest) + assert label2 in list_manifest_labels(manifest) + assert label3 in list_manifest_labels(manifest) + + other_manifest = Manifest.select().where(Manifest.id != manifest.id).get() + assert label1 not in list_manifest_labels(other_manifest) + assert label2 not in list_manifest_labels(other_manifest) + assert label3 not in list_manifest_labels(other_manifest) + + +def test_get_manifest_label(initialized_db): + found = False + for manifest_label in ManifestLabel.select(): + assert (get_manifest_label(manifest_label.label.uuid, manifest_label.manifest) == + manifest_label.label) + assert manifest_label.label in list_manifest_labels(manifest_label.manifest) + found = True + + assert found + + +def test_delete_manifest_label(initialized_db): + found = False + for manifest_label in list(ManifestLabel.select()): + assert (get_manifest_label(manifest_label.label.uuid, manifest_label.manifest) == + manifest_label.label) + assert manifest_label.label in list_manifest_labels(manifest_label.manifest) + + if manifest_label.label.source_type.mutable: + assert delete_manifest_label(manifest_label.label.uuid, manifest_label.manifest) + assert manifest_label.label not in list_manifest_labels(manifest_label.manifest) + assert get_manifest_label(manifest_label.label.uuid, manifest_label.manifest) is None + else: + with pytest.raises(DataModelException): + delete_manifest_label(manifest_label.label.uuid, manifest_label.manifest) + + found = True + + assert found diff --git a/data/model/oci/test/test_oci_manifest.py b/data/model/oci/test/test_oci_manifest.py new file mode 100644 index 000000000..70c758c88 --- /dev/null +++ b/data/model/oci/test/test_oci_manifest.py @@ -0,0 +1,83 @@ +from playhouse.test_utils import assert_query_count + +from app import docker_v2_signing_key + +from data.database import Tag, ManifestBlob, get_epoch_timestamp_ms +from data.model.oci.manifest import lookup_manifest, get_or_create_manifest +from data.model.oci.tag import filter_to_alive_tags, get_tag +from data.model.oci.shared import get_legacy_image_for_manifest +from data.model.repository import get_repository +from image.docker.schema1 import DockerSchema1ManifestBuilder, DockerSchema1Manifest + +from test.fixtures import * + +def test_lookup_manifest(initialized_db): + found = False + for tag in filter_to_alive_tags(Tag.select()): + found = True + repo = tag.repository + digest = tag.manifest.digest + with assert_query_count(1): + assert lookup_manifest(repo, digest) == tag.manifest + + assert found + + for tag in Tag.select(): + repo = tag.repository + digest = tag.manifest.digest + with assert_query_count(1): + assert lookup_manifest(repo, digest, allow_dead=True) == tag.manifest + + +def test_lookup_manifest_dead_tag(initialized_db): + dead_tag = Tag.select().where(Tag.lifetime_end_ms <= get_epoch_timestamp_ms()).get() + assert dead_tag.lifetime_end_ms <= get_epoch_timestamp_ms() + + assert lookup_manifest(dead_tag.repository, dead_tag.manifest.digest) is None + assert (lookup_manifest(dead_tag.repository, dead_tag.manifest.digest, allow_dead=True) == + dead_tag.manifest) + + +def test_get_or_create_manifest(initialized_db): + repository = get_repository('devtable', 'simple') + + latest_tag = get_tag(repository, 'latest') + legacy_image = get_legacy_image_for_manifest(latest_tag.manifest) + parsed = DockerSchema1Manifest(latest_tag.manifest.manifest_bytes, validate=False) + + builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'anothertag') + builder.add_layer(parsed.blob_digests[0], '{"id": "%s"}' % legacy_image.docker_image_id) + sample_manifest_instance = builder.build(docker_v2_signing_key) + + # Create a new manifest. + created, newly_created = get_or_create_manifest(repository, sample_manifest_instance) + assert newly_created + assert created is not None + assert created.digest == sample_manifest_instance.digest + assert created.manifest_bytes == sample_manifest_instance.bytes + + assert get_legacy_image_for_manifest(created) is not None + + blob_digests = [mb.blob.content_checksum for mb + in ManifestBlob.select().where(ManifestBlob.manifest == created)] + assert parsed.blob_digests[0] in blob_digests + + # Retrieve it again and ensure it is the same manifest. + created2, newly_created2 = get_or_create_manifest(repository, sample_manifest_instance) + assert not newly_created2 + assert created2 == created + + +def test_get_or_create_manifest_invalid_image(initialized_db): + repository = get_repository('devtable', 'simple') + + latest_tag = get_tag(repository, 'latest') + parsed = DockerSchema1Manifest(latest_tag.manifest.manifest_bytes, validate=False) + + builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'anothertag') + builder.add_layer(parsed.blob_digests[0], '{"id": "foo", "parent": "someinvalidimageid"}') + sample_manifest_instance = builder.build(docker_v2_signing_key) + + created, newly_created = get_or_create_manifest(repository, sample_manifest_instance) + assert created is None + assert newly_created is None diff --git a/data/model/oci/test/test_oci_tag.py b/data/model/oci/test/test_oci_tag.py new file mode 100644 index 000000000..624131be5 --- /dev/null +++ b/data/model/oci/test/test_oci_tag.py @@ -0,0 +1,253 @@ +from calendar import timegm +from datetime import timedelta, datetime + +from playhouse.test_utils import assert_query_count + +from data.database import (Tag, ManifestLegacyImage, TagToRepositoryTag, TagManifestToManifest, + TagManifest, Manifest) +from data.model.oci.tag import (find_matching_tag, get_most_recent_tag, list_alive_tags, + get_legacy_images_for_tags, filter_to_alive_tags, + filter_to_visible_tags, list_repository_tag_history, + get_expired_tag, get_tag, delete_tag, + delete_tags_for_manifest, change_tag_expiration, + set_tag_expiration_for_manifest, retarget_tag) +from data.model.repository import get_repository, create_repository + +from test.fixtures import * + +@pytest.mark.parametrize('namespace_name, repo_name, tag_names, expected', [ + ('devtable', 'simple', ['latest'], 'latest'), + ('devtable', 'simple', ['unknown', 'latest'], 'latest'), + ('devtable', 'simple', ['unknown'], None), +]) +def test_find_matching_tag(namespace_name, repo_name, tag_names, expected, initialized_db): + repo = get_repository(namespace_name, repo_name) + if expected is not None: + with assert_query_count(1): + found = find_matching_tag(repo, tag_names) + + assert found is not None + assert found.name == expected + assert not found.lifetime_end_ms + else: + with assert_query_count(1): + assert find_matching_tag(repo, tag_names) is None + + +def test_get_most_recent_tag(initialized_db): + repo = get_repository('outsideorg', 'coolrepo') + + with assert_query_count(1): + assert get_most_recent_tag(repo).name == 'latest' + + +def test_get_most_recent_tag_empty_repo(initialized_db): + empty_repo = create_repository('devtable', 'empty', None) + + with assert_query_count(1): + assert get_most_recent_tag(empty_repo) is None + + +def test_list_alive_tags(initialized_db): + found = False + for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())): + tags = list_alive_tags(tag.repository) + assert tag in tags + + with assert_query_count(1): + legacy_images = get_legacy_images_for_tags(tags) + + for tag in tags: + assert ManifestLegacyImage.get(manifest=tag.manifest).image == legacy_images[tag.id] + + found = True + + assert found + + # Ensure hidden tags cannot be listed. + tag = Tag.get() + tag.hidden = True + tag.save() + + tags = list_alive_tags(tag.repository) + assert tag not in tags + + +def test_get_tag(initialized_db): + found = False + for tag in filter_to_visible_tags(filter_to_alive_tags(Tag.select())): + repo = tag.repository + + with assert_query_count(1): + assert get_tag(repo, tag.name) == tag + found = True + + assert found + + +@pytest.mark.parametrize('namespace_name, repo_name', [ + ('devtable', 'simple'), + ('devtable', 'complex'), +]) +def test_list_repository_tag_history(namespace_name, repo_name, initialized_db): + repo = get_repository(namespace_name, repo_name) + + with assert_query_count(1): + results, has_more = list_repository_tag_history(repo, 1, 100) + + assert results + assert not has_more + + +def test_list_repository_tag_history_with_history(initialized_db): + repo = get_repository('devtable', 'history') + + with assert_query_count(1): + results, _ = list_repository_tag_history(repo, 1, 100) + + assert len(results) == 2 + assert results[0].lifetime_end_ms is None + assert results[1].lifetime_end_ms is not None + + with assert_query_count(1): + results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='latest') + + assert len(results) == 2 + assert results[0].lifetime_end_ms is None + assert results[1].lifetime_end_ms is not None + + with assert_query_count(1): + results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='foobar') + + assert len(results) == 0 + + +def test_list_repository_tag_history_all_tags(initialized_db): + for tag in Tag.select(): + repo = tag.repository + with assert_query_count(1): + results, _ = list_repository_tag_history(repo, 1, 1000) + + assert (tag in results) == (not tag.hidden) + + +@pytest.mark.parametrize('namespace_name, repo_name, tag_name, expected', [ + ('devtable', 'simple', 'latest', False), + ('devtable', 'simple', 'unknown', False), + ('devtable', 'complex', 'latest', False), + + ('devtable', 'history', 'latest', True), +]) +def test_get_expired_tag(namespace_name, repo_name, tag_name, expected, initialized_db): + repo = get_repository(namespace_name, repo_name) + + with assert_query_count(1): + assert bool(get_expired_tag(repo, tag_name)) == expected + + +def test_delete_tag(initialized_db): + found = False + for tag in list(filter_to_visible_tags(filter_to_alive_tags(Tag.select()))): + repo = tag.repository + + assert get_tag(repo, tag.name) == tag + assert tag.lifetime_end_ms is None + + with assert_query_count(4): + assert delete_tag(repo, tag.name) == tag + + assert get_tag(repo, tag.name) is None + found = True + + assert found + + +def test_delete_tags_for_manifest(initialized_db): + for tag in list(filter_to_visible_tags(filter_to_alive_tags(Tag.select()))): + repo = tag.repository + assert get_tag(repo, tag.name) == tag + + with assert_query_count(5): + assert delete_tags_for_manifest(tag.manifest) == [tag] + + assert get_tag(repo, tag.name) is None + + +@pytest.mark.parametrize('timedelta, expected_timedelta', [ + pytest.param(timedelta(seconds=1), timedelta(hours=1), id='less than minimum'), + pytest.param(timedelta(weeks=300), timedelta(weeks=104), id='more than maxium'), + pytest.param(timedelta(weeks=1), timedelta(weeks=1), id='within range'), +]) +def test_change_tag_expiration(timedelta, expected_timedelta, initialized_db): + now = datetime.utcnow() + now_ms = timegm(now.utctimetuple()) * 1000 + + tag = Tag.get() + tag.lifetime_start_ms = now_ms + tag.save() + + original_end_ms, okay = change_tag_expiration(tag, datetime.utcnow() + timedelta) + assert okay + assert original_end_ms == tag.lifetime_end_ms + + updated_tag = Tag.get(id=tag.id) + offset = expected_timedelta.total_seconds() * 1000 + expected_ms = (updated_tag.lifetime_start_ms + offset) + assert updated_tag.lifetime_end_ms == expected_ms + + +def test_set_tag_expiration_for_manifest(initialized_db): + tag = Tag.get() + manifest = tag.manifest + assert manifest is not None + + set_tag_expiration_for_manifest(manifest, datetime.utcnow() + timedelta(weeks=1)) + + updated_tag = Tag.get(id=tag.id) + assert updated_tag.lifetime_end_ms is not None + + +def test_retarget_tag(initialized_db): + repo = get_repository('devtable', 'history') + results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='latest') + + assert len(results) == 2 + assert results[0].lifetime_end_ms is None + assert results[1].lifetime_end_ms is not None + + # Revert back to the original manifest. + created = retarget_tag('latest', results[0].manifest, is_reversion=True, + now_ms=results[1].lifetime_end_ms + 10000) + assert created.lifetime_end_ms is None + assert created.reversion + assert created.name == 'latest' + assert created.manifest == results[0].manifest + + # Verify in the history. + results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='latest') + + assert len(results) == 3 + assert results[0].lifetime_end_ms is None + assert results[1].lifetime_end_ms is not None + assert results[2].lifetime_end_ms is not None + + assert results[0] == created + + # Verify old-style tables. + repository_tag = TagToRepositoryTag.get(tag=created).repository_tag + assert repository_tag.lifetime_start_ts == int(created.lifetime_start_ms / 1000) + + tag_manifest = TagManifest.get(tag=repository_tag) + assert TagManifestToManifest.get(tag_manifest=tag_manifest).manifest == created.manifest + + +def test_retarget_tag_wrong_name(initialized_db): + repo = get_repository('devtable', 'history') + results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='latest') + assert len(results) == 2 + + created = retarget_tag('someothername', results[1].manifest, is_reversion=True) + assert created is None + + results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name='latest') + assert len(results) == 2 diff --git a/data/model/tag.py b/data/model/tag.py index b8148c6d8..0fa9513cc 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -251,17 +251,18 @@ def list_repository_tags(namespace_name, repository_name, include_hidden=False, def create_or_update_tag(namespace_name, repository_name, tag_name, tag_docker_image_id, - reversion=False): + reversion=False, now_ms=None): try: repo = _basequery.get_existing_repository(namespace_name, repository_name) except Repository.DoesNotExist: raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name)) - return create_or_update_tag_for_repo(repo.id, tag_name, tag_docker_image_id, reversion=reversion) + return create_or_update_tag_for_repo(repo.id, tag_name, tag_docker_image_id, reversion=reversion, + now_ms=now_ms) def create_or_update_tag_for_repo(repository_id, tag_name, tag_docker_image_id, reversion=False, - oci_manifest=None): - now_ms = get_epoch_timestamp_ms() + oci_manifest=None, now_ms=None): + now_ms = now_ms or get_epoch_timestamp_ms() now_ts = int(now_ms / 1000) with db_transaction(): diff --git a/data/registry_model/__init__.py b/data/registry_model/__init__.py index 484bb7e41..561a848e5 100644 --- a/data/registry_model/__init__.py +++ b/data/registry_model/__init__.py @@ -1,3 +1,10 @@ -from data.registry_model.registry_pre_oci_model import pre_oci_model +import os +import logging -registry_model = pre_oci_model +from data.registry_model.registry_pre_oci_model import pre_oci_model +from data.registry_model.registry_oci_model import oci_model + +logger = logging.getLogger(__name__) + +registry_model = oci_model if os.getenv('OCI_DATA_MODEL') == 'true' else pre_oci_model +logger.debug('Using registry model `%s`', registry_model) diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index 93c81a448..798e4df3e 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -95,8 +95,25 @@ class Label(datatype('Label', ['key', 'value', 'uuid', 'source_type_name', 'medi class Tag(datatype('Tag', ['name', 'reversion', 'manifest_digest', 'lifetime_start_ts', - 'lifetime_end_ts'])): + 'lifetime_end_ts', 'lifetime_start_ms', 'lifetime_end_ms'])): """ Tag represents a tag in a repository, which points to a manifest or image. """ + @classmethod + def for_tag(cls, tag, legacy_image=None): + if tag is None: + return None + + return Tag(db_id=tag.id, + name=tag.name, + reversion=tag.reversion, + lifetime_start_ms=tag.lifetime_start_ms, + lifetime_end_ms=tag.lifetime_end_ms, + lifetime_start_ts=tag.lifetime_start_ms / 1000, + lifetime_end_ts=tag.lifetime_end_ms / 1000 if tag.lifetime_end_ms else None, + manifest_digest=tag.manifest.digest, + inputs=dict(legacy_image=legacy_image, + manifest=tag.manifest, + repository=RepositoryReference.for_id(tag.repository_id))) + @classmethod def for_repository_tag(cls, repository_tag, manifest_digest=None, legacy_image=None): if repository_tag is None: @@ -107,10 +124,19 @@ class Tag(datatype('Tag', ['name', 'reversion', 'manifest_digest', 'lifetime_sta reversion=repository_tag.reversion, lifetime_start_ts=repository_tag.lifetime_start_ts, lifetime_end_ts=repository_tag.lifetime_end_ts, + lifetime_start_ms=repository_tag.lifetime_start_ts * 1000, + lifetime_end_ms=(repository_tag.lifetime_end_ts * 1000 + if repository_tag.lifetime_end_ts else None), manifest_digest=manifest_digest, inputs=dict(legacy_image=legacy_image, repository=RepositoryReference.for_id(repository_tag.repository_id))) + @property + @requiresinput('manifest') + def _manifest(self, manifest): + """ Returns the manifest for this tag. Will only apply to new-style OCI tags. """ + return manifest + @property @requiresinput('repository') def repository(self, repository): @@ -144,6 +170,17 @@ class Manifest(datatype('Manifest', ['digest', 'media_type', 'manifest_bytes'])) media_type=DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE, # Always in legacy. inputs=dict(legacy_image=legacy_image)) + @classmethod + def for_manifest(cls, manifest, legacy_image): + if manifest is None: + return None + + return Manifest(db_id=manifest.id, + digest=manifest.digest, + manifest_bytes=manifest.manifest_bytes, + media_type=manifest.media_type.name, + inputs=dict(legacy_image=legacy_image)) + @property @requiresinput('legacy_image') def legacy_image(self, legacy_image): @@ -179,6 +216,11 @@ class LegacyImage(datatype('LegacyImage', ['docker_image_id', 'created', 'commen aggregate_size=image.aggregate_size, uploading=image.storage.uploading) + @property + def id(self): + """ Returns the database ID of the legacy image. """ + return self._db_id + @property @requiresinput('images_map') @requiresinput('ancestor_id_list') diff --git a/data/registry_model/registry_oci_model.py b/data/registry_model/registry_oci_model.py new file mode 100644 index 000000000..455cff48f --- /dev/null +++ b/data/registry_model/registry_oci_model.py @@ -0,0 +1,413 @@ +# pylint: disable=protected-access +import logging + +from contextlib import contextmanager + +from data import database +from data import model +from data.model import oci, DataModelException +from data.database import db_transaction, Image +from data.registry_model.interface import RegistryDataInterface +from data.registry_model.datatypes import Tag, Manifest, LegacyImage, Label, SecurityScanStatus +from data.registry_model.shared import SharedModel +from data.registry_model.label_handlers import apply_label_to_manifest +from util.validation import is_json + + +logger = logging.getLogger(__name__) + + +class OCIModel(SharedModel, RegistryDataInterface): + """ + OCIModel implements the data model for the registry API using a database schema + after it was changed to support the OCI specification. + """ + + def find_matching_tag(self, repository_ref, tag_names): + """ Finds an alive tag in the repository matching one of the given tag names and returns it + or None if none. + """ + found_tag = oci.tag.find_matching_tag(repository_ref._db_id, tag_names) + assert found_tag is None or not found_tag.hidden + return Tag.for_tag(found_tag) + + def get_most_recent_tag(self, repository_ref): + """ Returns the most recently pushed alive tag in the repository, if any. If none, returns + None. + """ + found_tag = oci.tag.get_most_recent_tag(repository_ref._db_id) + assert found_tag is None or not found_tag.hidden + return Tag.for_tag(found_tag) + + def get_manifest_for_tag(self, tag, backfill_if_necessary=False): + """ Returns the manifest associated with the given tag. """ + legacy_image = oci.shared.get_legacy_image_for_manifest(tag._manifest) + return Manifest.for_manifest(tag._manifest, LegacyImage.for_image(legacy_image)) + + def lookup_manifest_by_digest(self, repository_ref, manifest_digest, allow_dead=False, + include_legacy_image=False): + """ Looks up the manifest with the given digest under the given repository and returns it + or None if none. """ + manifest = oci.manifest.lookup_manifest(repository_ref._db_id, manifest_digest, + allow_dead=allow_dead) + if manifest is None: + return None + + legacy_image = None + if include_legacy_image: + try: + legacy_image_id = database.ManifestLegacyImage.get(manifest=manifest).image.docker_image_id + legacy_image = self.get_legacy_image(repository_ref, legacy_image_id, include_parents=True) + except database.ManifestLegacyImage.DoesNotExist: + return None + + return Manifest.for_manifest(manifest, legacy_image) + + def create_manifest_label(self, manifest, key, value, source_type_name, media_type_name=None): + """ Creates a label on the manifest with the given key and value. """ + label_data = dict(key=key, value=value, source_type_name=source_type_name, + media_type_name=media_type_name) + + with db_transaction(): + # Create the label itself. + label = oci.label.create_manifest_label(manifest._db_id, key, value, source_type_name, + media_type_name) + if label is None: + return None + + # Apply any changes to the manifest that the label prescribes. + apply_label_to_manifest(label_data, manifest, self) + + return Label.for_label(label) + + @contextmanager + def batch_create_manifest_labels(self, manifest): + """ Returns a context manager for batch creation of labels on a manifest. + + Can raise InvalidLabelKeyException or InvalidMediaTypeException depending + on the validation errors. + """ + labels_to_add = [] + def add_label(key, value, source_type_name, media_type_name=None): + labels_to_add.append(dict(key=key, value=value, source_type_name=source_type_name, + media_type_name=media_type_name)) + + yield add_label + + # TODO: make this truly batch once we've fully transitioned to V2_2 and no longer need + # the mapping tables. + for label_data in labels_to_add: + with db_transaction(): + # Create the label itself. + oci.label.create_manifest_label(manifest._db_id, **label_data) + + # Apply any changes to the manifest that the label prescribes. + apply_label_to_manifest(label_data, manifest, self) + + def list_manifest_labels(self, manifest, key_prefix=None): + """ Returns all labels found on the manifest. If specified, the key_prefix will filter the + labels returned to those keys that start with the given prefix. + """ + labels = oci.label.list_manifest_labels(manifest._db_id, prefix_filter=key_prefix) + return [Label.for_label(l) for l in labels] + + def get_manifest_label(self, manifest, label_uuid): + """ Returns the label with the specified UUID on the manifest or None if none. """ + return Label.for_label(oci.label.get_manifest_label(label_uuid, manifest._db_id)) + + def delete_manifest_label(self, manifest, label_uuid): + """ Delete the label with the specified UUID on the manifest. Returns the label deleted + or None if none. + """ + return Label.for_label(oci.label.delete_manifest_label(label_uuid, manifest._db_id)) + + def list_repository_tags(self, repository_ref, include_legacy_images=False, + start_pagination_id=None, + limit=None): + """ + Returns a list of all the active tags in the repository. Note that this can be a *heavy* + operation on repositories with a lot of tags, and should be avoided for more targetted + operations wherever possible. + """ + tags = list(oci.tag.list_alive_tags(repository_ref._db_id, start_pagination_id, limit)) + legacy_images_map = {} + if include_legacy_images: + legacy_images_map = oci.tag.get_legacy_images_for_tags(tags) + + return [Tag.for_tag(tag, legacy_image=LegacyImage.for_image(legacy_images_map.get(tag.id))) + for tag in tags] + + def list_repository_tag_history(self, repository_ref, page=1, size=100, specific_tag_name=None, + active_tags_only=False): + """ + Returns the history of all tags in the repository (unless filtered). This includes tags that + have been made in-active due to newer versions of those tags coming into service. + """ + tags, has_more = oci.tag.list_repository_tag_history(repository_ref._db_id, + page, size, + specific_tag_name, + active_tags_only) + + # TODO: do we need legacy images here? + legacy_images_map = oci.tag.get_legacy_images_for_tags(tags) + return [Tag.for_tag(tag, LegacyImage.for_image(legacy_images_map.get(tag.id))) for tag in tags], has_more + + def has_expired_tag(self, repository_ref, tag_name): + """ + Returns true if and only if the repository contains a tag with the given name that is expired. + """ + return bool(oci.tag.get_expired_tag(repository_ref._db_id, tag_name)) + + def get_repo_tag(self, repository_ref, tag_name, include_legacy_image=False): + """ + Returns the latest, *active* tag found in the repository, with the matching name + or None if none. + """ + assert isinstance(tag_name, basestring) + + tag = oci.tag.get_tag(repository_ref._db_id, tag_name) + if tag is None: + return None + + legacy_image = None + if include_legacy_image: + legacy_images = oci.tag.get_legacy_images_for_tags([tag]) + legacy_image = legacy_images.get(tag.id) + + return Tag.for_tag(tag, legacy_image=LegacyImage.for_image(legacy_image)) + + def create_manifest_and_retarget_tag(self, repository_ref, manifest_interface_instance, tag_name): + """ Creates a manifest in a repository, adding all of the necessary data in the model. + + The `manifest_interface_instance` parameter must be an instance of the manifest + interface as returned by the image/docker package. + + Note that all blobs referenced by the manifest must exist under the repository or this + method will fail and return None. + + Returns a reference to the (created manifest, tag) or (None, None) on error. + """ + # Get or create the manifest itself. + manifest, newly_created = oci.manifest.get_or_create_manifest(repository_ref._db_id, + manifest_interface_instance) + if manifest is None: + return (None, None) + + # Re-target the tag to it. + tag = oci.tag.retarget_tag(tag_name, manifest) + if tag is None: + return (None, None) + + legacy_image = oci.shared.get_legacy_image_for_manifest(manifest) + if legacy_image is None: + return (None, None) + + # Save the labels on the manifest. Note that order is important here: This must come after the + # tag has been changed. + # TODO(jschorr): Support schema2 here when we're ready. + if newly_created: + has_labels = False + + with self.batch_create_manifest_labels(Manifest.for_manifest(manifest, None)) as add_label: + for key, value in manifest_interface_instance.layers[-1].v1_metadata.labels.iteritems(): + media_type = 'application/json' if is_json(value) else 'text/plain' + add_label(key, value, 'manifest', media_type) + has_labels = True + + # Reload the tag in case any updates were applied. + if has_labels: + tag = database.Tag.get(id=tag.id) + + li = LegacyImage.for_image(legacy_image) + return (Manifest.for_manifest(manifest, li), Tag.for_tag(tag, li)) + + def retarget_tag(self, repository_ref, tag_name, manifest_or_legacy_image, + is_reversion=False): + """ + Creates, updates or moves a tag to a new entry in history, pointing to the manifest or + legacy image specified. If is_reversion is set to True, this operation is considered a + reversion over a previous tag move operation. Returns the updated Tag or None on error. + """ + manifest_id = manifest_or_legacy_image._db_id + if isinstance(manifest_or_legacy_image, LegacyImage): + # If a legacy image was required, build a new manifest for it and move the tag to that. + try: + image_row = database.Image.get(id=manifest_or_legacy_image._db_id) + except database.Image.DoesNotExist: + return None + + manifest_instance = self._build_manifest_for_legacy_image(tag_name, image_row) + if manifest_instance is None: + return None + + manifest, _ = oci.manifest.get_or_create_manifest(repository_ref._db_id, manifest_instance) + if manifest is None: + return None + + manifest_id = manifest.id + + tag = oci.tag.retarget_tag(tag_name, manifest_id, is_reversion=is_reversion) + legacy_image = LegacyImage.for_image(oci.shared.get_legacy_image_for_manifest(manifest_id)) + return Tag.for_tag(tag, legacy_image) + + def delete_tag(self, repository_ref, tag_name): + """ + Deletes the latest, *active* tag with the given name in the repository. + """ + deleted_tag = oci.tag.delete_tag(repository_ref._db_id, tag_name) + if deleted_tag is None: + # TODO(jschorr): This is only needed because preoci raises an exception. Remove and fix + # expected status codes once PreOCIModel is gone. + msg = ('Invalid repository tag \'%s\' on repository' % tag_name) + raise DataModelException(msg) + + return Tag.for_tag(deleted_tag) + + def delete_tags_for_manifest(self, manifest): + """ + Deletes all tags pointing to the given manifest, making the manifest inaccessible for pulling. + Returns the tags deleted, if any. Returns None on error. + """ + deleted_tags = oci.tag.delete_tags_for_manifest(manifest._db_id) + return [Tag.for_tag(tag) for tag in deleted_tags] + + def change_repository_tag_expiration(self, tag, expiration_date): + """ Sets the expiration date of the tag under the matching repository to that given. If the + expiration date is None, then the tag will not expire. Returns a tuple of the previous + expiration timestamp in seconds (if any), and whether the operation succeeded. + """ + return oci.tag.change_tag_expiration(tag._db_id, expiration_date) + + def get_legacy_images_owned_by_tag(self, tag): + """ Returns all legacy images *solely owned and used* by the given tag. """ + tag_obj = oci.tag.get_tag_by_id(tag._db_id) + if tag_obj is None: + return None + + tags = oci.tag.list_alive_tags(tag_obj.repository_id) + legacy_images = oci.tag.get_legacy_images_for_tags(tags) + + tag_legacy_image = legacy_images.get(tag._db_id) + if tag_legacy_image is None: + return None + + assert isinstance(tag_legacy_image, Image) + + # Collect the IDs of all images that the tag uses. + tag_image_ids = set() + tag_image_ids.add(tag_legacy_image.id) + tag_image_ids.update(tag_legacy_image.ancestor_id_list()) + + # Remove any images shared by other tags. + for current in tags: + if current == tag_obj: + continue + + current_image = legacy_images.get(current.id) + if current_image is None: + continue + + tag_image_ids.discard(current_image.id) + tag_image_ids = tag_image_ids.difference(current_image.ancestor_id_list()) + if not tag_image_ids: + return [] + + if not tag_image_ids: + return [] + + # Load the images we need to return. + images = database.Image.select().where(database.Image.id << list(tag_image_ids)) + all_image_ids = set() + for image in images: + all_image_ids.add(image.id) + all_image_ids.update(image.ancestor_id_list()) + + # Build a map of all the images and their parents. + images_map = {} + all_images = database.Image.select().where(database.Image.id << list(all_image_ids)) + for image in all_images: + images_map[image.id] = image + + return [LegacyImage.for_image(image, images_map=images_map) for image in images] + + def get_security_status(self, manifest_or_legacy_image): + """ Returns the security status for the given manifest or legacy image or None if none. """ + image = None + + if isinstance(manifest_or_legacy_image, Manifest): + image = oci.shared.get_legacy_image_for_manifest(manifest_or_legacy_image._db_id) + if image is None: + return None + else: + try: + image = database.Image.get(id=manifest_or_legacy_image._db_id) + except database.Image.DoesNotExist: + return None + + if image.security_indexed_engine is not None and image.security_indexed_engine >= 0: + return SecurityScanStatus.SCANNED if image.security_indexed else SecurityScanStatus.FAILED + + return SecurityScanStatus.QUEUED + + def backfill_manifest_for_tag(self, tag): + """ Backfills a manifest for the V1 tag specified. + If a manifest already exists for the tag, returns that manifest. + + NOTE: This method will only be necessary until we've completed the backfill, at which point + it should be removed. + """ + # Nothing to do for OCI tags. + manifest = tag.manifest + if manifest is None: + return None + + legacy_image = oci.shared.get_legacy_image_for_manifest(manifest) + return Manifest.for_manifest(manifest, LegacyImage.for_image(legacy_image)) + + def list_manifest_layers(self, manifest, include_placements=False): + """ Returns an *ordered list* of the layers found in the manifest, starting at the base and + working towards the leaf, including the associated Blob and its placements (if specified). + Returns None if the manifest could not be parsed and validated. + """ + try: + manifest_obj = database.Manifest.get(id=manifest._db_id) + except database.Manifest.DoesNotExist: + logger.exception('Could not find manifest for manifest `%s`', manifest._db_id) + return None + + return self._list_manifest_layers(manifest, manifest_obj.repository_id, include_placements) + + def lookup_derived_image(self, manifest, verb, varying_metadata=None, include_placements=False): + """ + Looks up the derived image for the given manifest, verb and optional varying metadata and + returns it or None if none. + """ + legacy_image = oci.shared.get_legacy_image_for_manifest(manifest._db_id) + if legacy_image is None: + return None + + derived = model.image.find_derived_storage_for_image(legacy_image, verb, varying_metadata) + return self._build_derived(derived, verb, varying_metadata, include_placements) + + def lookup_or_create_derived_image(self, manifest, verb, storage_location, varying_metadata=None, + include_placements=False): + """ + Looks up the derived image for the given maniest, verb and optional varying metadata + and returns it. If none exists, a new derived image is created. + """ + legacy_image = oci.shared.get_legacy_image_for_manifest(manifest._db_id) + if legacy_image is None: + return None + + derived = model.image.find_or_create_derived_storage(legacy_image, verb, storage_location, + varying_metadata) + return self._build_derived(derived, verb, varying_metadata, include_placements) + + def set_tags_expiration_for_manifest(self, manifest, expiration_sec): + """ + Sets the expiration on all tags that point to the given manifest to that specified. + """ + oci.tag.set_tag_expiration_sec_for_manifest(manifest._db_id, expiration_sec) + + +oci_model = OCIModel() diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index 1e0b4065a..b70045b5f 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -155,60 +155,23 @@ class PreOCIModel(SharedModel, RegistryDataInterface): manifest = Manifest.for_tag_manifest(tag_manifest) # Save the labels on the manifest. + repo_tag = tag_manifest.tag if newly_created: + has_labels = False with self.batch_create_manifest_labels(manifest) as add_label: + if add_label is None: + return None, None + for key, value in manifest_interface_instance.layers[-1].v1_metadata.labels.iteritems(): media_type = 'application/json' if is_json(value) else 'text/plain' add_label(key, value, 'manifest', media_type) + has_labels = True - return manifest, Tag.for_repository_tag(tag_manifest.tag) + # Reload the tag in case any updates were applied. + if has_labels: + repo_tag = database.RepositoryTag.get(id=repo_tag.id) - def get_legacy_images(self, repository_ref): - """ - Returns an iterator of all the LegacyImage's defined in the matching repository. - """ - repo = model.repository.lookup_repository(repository_ref._db_id) - if repo is None: - return None - - all_images = model.image.get_repository_images_without_placements(repo) - all_images_map = {image.id: image for image in all_images} - - all_tags = model.tag.list_repository_tags(repo.namespace_user.username, repo.name) - tags_by_image_id = defaultdict(list) - for tag in all_tags: - tags_by_image_id[tag.image_id].append(tag) - - return [LegacyImage.for_image(image, images_map=all_images_map, tags_map=tags_by_image_id) - for image in all_images] - - def get_legacy_image(self, repository_ref, docker_image_id, include_parents=False, - include_blob=False): - """ - Returns the matching LegacyImages under the matching repository, if any. If none, - returns None. - """ - repo = model.repository.lookup_repository(repository_ref._db_id) - if repo is None: - return None - - image = model.image.get_image(repository_ref._db_id, docker_image_id) - if image is None: - return None - - parent_images_map = None - if include_parents: - parent_images = model.image.get_parent_images(repo.namespace_user.username, repo.name, image) - parent_images_map = {image.id: image for image in parent_images} - - blob = None - if include_blob: - placements = list(model.storage.get_storage_locations(image.storage.uuid)) - blob = Blob.for_image_storage(image.storage, - storage_path=model.storage.get_layer_path(image.storage), - placements=placements) - - return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob) + return manifest, Tag.for_repository_tag(repo_tag) def create_manifest_label(self, manifest, key, value, source_type_name, media_type_name=None): """ Creates a label on the manifest with the given key and value. """ @@ -471,10 +434,6 @@ class PreOCIModel(SharedModel, RegistryDataInterface): NOTE: This method will only be necessary until we've completed the backfill, at which point it should be removed. """ - import features - - from app import app, docker_v2_signing_key - # Ensure that there isn't already a manifest for the tag. tag_manifest = model.tag.get_tag_manifest(tag._db_id) if tag_manifest is not None: @@ -492,29 +451,8 @@ class PreOCIModel(SharedModel, RegistryDataInterface): namespace_name = repo.namespace_user.username repo_name = repo.name - # Find the v1 metadata for this image and its parents. - repo_image = tag_obj.image - parents = model.image.get_parent_images(namespace_name, repo_name, repo_image) - - # If the manifest is being generated under the library namespace, then we make its namespace - # empty. - manifest_namespace = namespace_name - if features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']: - manifest_namespace = '' - - # Create and populate the manifest builder - builder = DockerSchema1ManifestBuilder(manifest_namespace, repo_name, tag.name) - - # Add the leaf layer - builder.add_layer(repo_image.storage.content_checksum, repo_image.v1_json_metadata) - - for parent_image in parents: - builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata) - - # Sign the manifest with our signing key. - manifest = builder.build(docker_v2_signing_key) - # Write the manifest to the DB. + manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image) blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo, manifest.checksums) @@ -533,42 +471,13 @@ class PreOCIModel(SharedModel, RegistryDataInterface): working towards the leaf, including the associated Blob and its placements (if specified). Returns None if the manifest could not be parsed and validated. """ - try: - parsed = manifest.get_parsed_manifest() - except ManifestException: - logger.exception('Could not parse and validate manifest `%s`', manifest._db_id) - return None - try: tag_manifest = database.TagManifest.get(id=manifest._db_id) except database.TagManifest.DoesNotExist: logger.exception('Could not find tag manifest for manifest `%s`', manifest._db_id) return None - repo = tag_manifest.tag.repository - blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo, parsed.checksums) - storage_map = {blob.content_checksum: blob for blob in blob_query} - - manifest_layers = [] - for layer in parsed.layers: - digest_str = str(layer.digest) - if digest_str not in storage_map: - logger.error('Missing digest `%s` for manifest `%s`', layer.digest, manifest._db_id) - return None - - image_storage = storage_map[digest_str] - assert image_storage.cas_path is not None - - placements = None - if include_placements: - placements = list(model.storage.get_storage_locations(image_storage.uuid)) - - blob = Blob.for_image_storage(image_storage, - storage_path=model.storage.get_layer_path(image_storage), - placements=placements) - manifest_layers.append(ManifestLayer(layer, blob)) - - return manifest_layers + return self._list_manifest_layers(manifest, tag_manifest.tag.repository_id, include_placements) def lookup_derived_image(self, manifest, verb, varying_metadata=None, include_placements=False): """ @@ -602,21 +511,6 @@ class PreOCIModel(SharedModel, RegistryDataInterface): varying_metadata) return self._build_derived(derived, verb, varying_metadata, include_placements) - def _build_derived(self, derived, verb, varying_metadata, include_placements): - if derived is None: - return None - - derived_storage = derived.derivative - placements = None - if include_placements: - placements = list(model.storage.get_storage_locations(derived_storage.uuid)) - - blob = Blob.for_image_storage(derived_storage, - storage_path=model.storage.get_layer_path(derived_storage), - placements=placements) - - return DerivedImage.for_derived_storage(derived, verb, varying_metadata, blob) - def set_tags_expiration_for_manifest(self, manifest, expiration_sec): """ Sets the expiration on all tags that point to the given manifest to that specified. diff --git a/data/registry_model/shared.py b/data/registry_model/shared.py index c226b27e8..8fc23fff2 100644 --- a/data/registry_model/shared.py +++ b/data/registry_model/shared.py @@ -1,11 +1,15 @@ # pylint: disable=protected-access import logging +from collections import defaultdict + from data import database from data import model from data.cache import cache_key from data.registry_model.datatype import FromDictionaryException -from data.registry_model.datatypes import RepositoryReference, Blob, TorrentInfo, BlobUpload +from data.registry_model.datatypes import (RepositoryReference, Blob, TorrentInfo, BlobUpload, + LegacyImage, ManifestLayer, DerivedImage) +from image.docker.schema1 import ManifestException, DockerSchema1ManifestBuilder logger = logging.getLogger(__name__) @@ -254,3 +258,130 @@ class SharedModel: storage = model.blob.temp_link_blob(namespace_name, repo_name, blob.digest, expiration_sec) return bool(storage) + + def get_legacy_images(self, repository_ref): + """ + Returns an iterator of all the LegacyImage's defined in the matching repository. + """ + repo = model.repository.lookup_repository(repository_ref._db_id) + if repo is None: + return None + + all_images = model.image.get_repository_images_without_placements(repo) + all_images_map = {image.id: image for image in all_images} + + all_tags = model.tag.list_repository_tags(repo.namespace_user.username, repo.name) + tags_by_image_id = defaultdict(list) + for tag in all_tags: + tags_by_image_id[tag.image_id].append(tag) + + return [LegacyImage.for_image(image, images_map=all_images_map, tags_map=tags_by_image_id) + for image in all_images] + + def get_legacy_image(self, repository_ref, docker_image_id, include_parents=False, + include_blob=False): + """ + Returns the matching LegacyImages under the matching repository, if any. If none, + returns None. + """ + repo = model.repository.lookup_repository(repository_ref._db_id) + if repo is None: + return None + + image = model.image.get_image(repository_ref._db_id, docker_image_id) + if image is None: + return None + + parent_images_map = None + if include_parents: + parent_images = model.image.get_parent_images(repo.namespace_user.username, repo.name, image) + parent_images_map = {image.id: image for image in parent_images} + + blob = None + if include_blob: + placements = list(model.storage.get_storage_locations(image.storage.uuid)) + blob = Blob.for_image_storage(image.storage, + storage_path=model.storage.get_layer_path(image.storage), + placements=placements) + + return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob) + + def _list_manifest_layers(self, manifest, repo_id, include_placements=False): + """ Returns an *ordered list* of the layers found in the manifest, starting at the base and + working towards the leaf, including the associated Blob and its placements (if specified). + Returns None if the manifest could not be parsed and validated. + """ + try: + parsed = manifest.get_parsed_manifest() + except ManifestException: + logger.exception('Could not parse and validate manifest `%s`', manifest._db_id) + return None + + blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id, parsed.checksums) + storage_map = {blob.content_checksum: blob for blob in blob_query} + + manifest_layers = [] + for layer in parsed.layers: + digest_str = str(layer.digest) + if digest_str not in storage_map: + logger.error('Missing digest `%s` for manifest `%s`', layer.digest, manifest._db_id) + return None + + image_storage = storage_map[digest_str] + assert image_storage.cas_path is not None + + placements = None + if include_placements: + placements = list(model.storage.get_storage_locations(image_storage.uuid)) + + blob = Blob.for_image_storage(image_storage, + storage_path=model.storage.get_layer_path(image_storage), + placements=placements) + manifest_layers.append(ManifestLayer(layer, blob)) + + return manifest_layers + + def _build_derived(self, derived, verb, varying_metadata, include_placements): + if derived is None: + return None + + derived_storage = derived.derivative + placements = None + if include_placements: + placements = list(model.storage.get_storage_locations(derived_storage.uuid)) + + blob = Blob.for_image_storage(derived_storage, + storage_path=model.storage.get_layer_path(derived_storage), + placements=placements) + + return DerivedImage.for_derived_storage(derived, verb, varying_metadata, blob) + + def _build_manifest_for_legacy_image(self, tag_name, legacy_image_row): + import features + + from app import app, docker_v2_signing_key + + repo = legacy_image_row.repository + namespace_name = repo.namespace_user.username + repo_name = repo.name + + # Find the v1 metadata for this image and its parents. + parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row) + + # If the manifest is being generated under the library namespace, then we make its namespace + # empty. + manifest_namespace = namespace_name + if features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']: + manifest_namespace = '' + + # Create and populate the manifest builder + builder = DockerSchema1ManifestBuilder(manifest_namespace, repo_name, tag_name) + + # Add the leaf layer + builder.add_layer(legacy_image_row.storage.content_checksum, legacy_image_row.v1_json_metadata) + + for parent_image in parents: + builder.add_layer(parent_image.storage.content_checksum, parent_image.v1_json_metadata) + + # Sign the manifest with our signing key. + return builder.build(docker_v2_signing_key) diff --git a/data/registry_model/test/test_interface.py b/data/registry_model/test/test_interface.py index aa9f35d81..0c75d06b7 100644 --- a/data/registry_model/test/test_interface.py +++ b/data/registry_model/test/test_interface.py @@ -1,4 +1,5 @@ import hashlib +import json import uuid from datetime import datetime, timedelta @@ -16,16 +17,21 @@ from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, TorrentInfo, Tag, TagToRepositoryTag, close_db_filter) from data.cache.impl import InMemoryDataModelCache from data.registry_model.registry_pre_oci_model import PreOCIModel +from data.registry_model.registry_oci_model import OCIModel from data.registry_model.datatypes import RepositoryReference from image.docker.schema1 import DockerSchema1ManifestBuilder from test.fixtures import * -@pytest.fixture(params=[PreOCIModel]) +@pytest.fixture(params=[PreOCIModel, OCIModel]) def registry_model(request, initialized_db): return request.param() +@pytest.fixture() +def pre_oci_model(initialized_db): + return PreOCIModel() + @pytest.mark.parametrize('names, expected', [ (['unknown'], None), @@ -83,10 +89,11 @@ def test_lookup_manifests(repo_namespace, repo_name, registry_model): found_tag = registry_model.find_matching_tag(repository_ref, ['latest']) found_manifest = registry_model.get_manifest_for_tag(found_tag) found = registry_model.lookup_manifest_by_digest(repository_ref, found_manifest.digest, - include_legacy_image=True) + include_legacy_image=True) assert found._db_id == found_manifest._db_id assert found.digest == found_manifest.digest assert found.legacy_image + assert found.legacy_image.parents def test_lookup_unknown_manifest(registry_model): @@ -110,11 +117,11 @@ def test_legacy_images(repo_namespace, repo_name, registry_model): found_tags = set() for image in legacy_images: found_image = registry_model.get_legacy_image(repository_ref, image.docker_image_id, - include_parents=True) + include_parents=True) with assert_query_count(5 if found_image.parents else 4): found_image = registry_model.get_legacy_image(repository_ref, image.docker_image_id, - include_parents=True, include_blob=True) + include_parents=True, include_blob=True) assert found_image.docker_image_id == image.docker_image_id assert found_image.parents == image.parents assert found_image.blob @@ -132,7 +139,7 @@ def test_legacy_images(repo_namespace, repo_name, registry_model): # Try without parents and ensure it raises an exception. found_image = registry_model.get_legacy_image(repository_ref, image.docker_image_id, - include_parents=False) + include_parents=False) with pytest.raises(Exception): assert not found_image.parents @@ -211,23 +218,19 @@ def test_batch_labels(registry_model): ]) def test_repository_tags(repo_namespace, repo_name, registry_model): repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) - - with assert_query_count(1): - tags = registry_model.list_repository_tags(repository_ref, include_legacy_images=True) - assert len(tags) + tags = registry_model.list_repository_tags(repository_ref, include_legacy_images=True) + assert len(tags) for tag in tags: - with assert_query_count(2): - found_tag = registry_model.get_repo_tag(repository_ref, tag.name, include_legacy_image=True) - assert found_tag == tag + found_tag = registry_model.get_repo_tag(repository_ref, tag.name, include_legacy_image=True) + assert found_tag == tag if found_tag.legacy_image is None: continue - with assert_query_count(2): - found_image = registry_model.get_legacy_image(repository_ref, - found_tag.legacy_image.docker_image_id) - assert found_image == found_tag.legacy_image + found_image = registry_model.get_legacy_image(repository_ref, + found_tag.legacy_image.docker_image_id) + assert found_image == found_tag.legacy_image def test_repository_tag_history(registry_model): @@ -295,15 +298,15 @@ def test_retarget_tag_history(use_manifest, registry_model): if use_manifest: manifest_or_legacy_image = registry_model.lookup_manifest_by_digest(repository_ref, - history[1].manifest_digest, - allow_dead=True) + history[0].manifest_digest, + allow_dead=True) else: - manifest_or_legacy_image = history[1].legacy_image + manifest_or_legacy_image = history[0].legacy_image # Retarget the tag. assert manifest_or_legacy_image updated_tag = registry_model.retarget_tag(repository_ref, 'latest', manifest_or_legacy_image, - is_reversion=True) + is_reversion=True) # Ensure the tag has changed targets. if use_manifest: @@ -316,23 +319,6 @@ def test_retarget_tag_history(use_manifest, registry_model): assert len(new_history) == len(history) + 1 -def test_retarget_tag(registry_model): - repository_ref = registry_model.lookup_repository('devtable', 'complex') - history, _ = registry_model.list_repository_tag_history(repository_ref) - - prod_tag = registry_model.get_repo_tag(repository_ref, 'prod', include_legacy_image=True) - - # Retarget the tag. - updated_tag = registry_model.retarget_tag(repository_ref, 'latest', prod_tag.legacy_image) - - # Ensure the tag has changed targets. - assert updated_tag.legacy_image == prod_tag.legacy_image - - # Ensure history has been updated. - new_history, _ = registry_model.list_repository_tag_history(repository_ref) - assert len(new_history) == len(history) + 1 - - def test_change_repository_tag_expiration(registry_model): repository_ref = registry_model.lookup_repository('devtable', 'simple') tag = registry_model.get_repo_tag(repository_ref, 'latest') @@ -399,24 +385,24 @@ def clear_rows(initialized_db): ('devtable', 'history'), ('buynlarge', 'orgrepo'), ]) -def test_backfill_manifest_for_tag(repo_namespace, repo_name, clear_rows, registry_model): - repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) - tags = registry_model.list_repository_tags(repository_ref) +def test_backfill_manifest_for_tag(repo_namespace, repo_name, clear_rows, pre_oci_model): + repository_ref = pre_oci_model.lookup_repository(repo_namespace, repo_name) + tags = pre_oci_model.list_repository_tags(repository_ref) assert tags for tag in tags: assert not tag.manifest_digest - assert registry_model.backfill_manifest_for_tag(tag) + assert pre_oci_model.backfill_manifest_for_tag(tag) - tags = registry_model.list_repository_tags(repository_ref, include_legacy_images=True) + tags = pre_oci_model.list_repository_tags(repository_ref, include_legacy_images=True) assert tags for tag in tags: assert tag.manifest_digest - manifest = registry_model.get_manifest_for_tag(tag) + manifest = pre_oci_model.get_manifest_for_tag(tag) assert manifest - legacy_image = registry_model.get_legacy_image(repository_ref, tag.legacy_image.docker_image_id, + legacy_image = pre_oci_model.get_legacy_image(repository_ref, tag.legacy_image.docker_image_id, include_parents=True) parsed_manifest = manifest.get_parsed_manifest() @@ -430,19 +416,19 @@ def test_backfill_manifest_for_tag(repo_namespace, repo_name, clear_rows, regist ('devtable', 'history'), ('buynlarge', 'orgrepo'), ]) -def test_backfill_manifest_on_lookup(repo_namespace, repo_name, clear_rows, registry_model): - repository_ref = registry_model.lookup_repository(repo_namespace, repo_name) - tags = registry_model.list_repository_tags(repository_ref) +def test_backfill_manifest_on_lookup(repo_namespace, repo_name, clear_rows, pre_oci_model): + repository_ref = pre_oci_model.lookup_repository(repo_namespace, repo_name) + tags = pre_oci_model.list_repository_tags(repository_ref) assert tags for tag in tags: assert not tag.manifest_digest - assert not registry_model.get_manifest_for_tag(tag) + assert not pre_oci_model.get_manifest_for_tag(tag) - manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) + manifest = pre_oci_model.get_manifest_for_tag(tag, backfill_if_necessary=True) assert manifest - updated_tag = registry_model.get_repo_tag(repository_ref, tag.name) + updated_tag = pre_oci_model.get_repo_tag(repository_ref, tag.name) assert updated_tag.manifest_digest == manifest.digest @@ -471,9 +457,8 @@ def test_list_manifest_layers(repo_namespace, repo_name, registry_model): manifest = registry_model.get_manifest_for_tag(tag) assert manifest - with assert_query_count(4): - layers = registry_model.list_manifest_layers(manifest) - assert layers + layers = registry_model.list_manifest_layers(manifest) + assert layers layers = registry_model.list_manifest_layers(manifest, include_placements=True) assert layers @@ -522,7 +507,7 @@ def test_derived_image(registry_model): assert registry_model.lookup_derived_image(manifest, 'squash', {'foo': 'bar'}) is None squashed_foo = registry_model.lookup_or_create_derived_image(manifest, 'squash', 'local_us', - {'foo': 'bar'}) + {'foo': 'bar'}) assert squashed_foo != squashed assert registry_model.lookup_derived_image(manifest, 'squash', {'foo': 'bar'}) == squashed_foo @@ -530,7 +515,7 @@ def test_derived_image(registry_model): # Lookup with placements. squashed = registry_model.lookup_or_create_derived_image(manifest, 'squash', 'local_us', {}, - include_placements=True) + include_placements=True) assert squashed.blob.placements # Delete the derived image. @@ -712,8 +697,8 @@ def test_create_manifest_and_retarget_tag(registry_model): assert sample_manifest is not None another_manifest, tag = registry_model.create_manifest_and_retarget_tag(repository_ref, - sample_manifest, - 'anothertag') + sample_manifest, + 'anothertag') assert another_manifest is not None assert tag is not None @@ -722,3 +707,38 @@ def test_create_manifest_and_retarget_tag(registry_model): layers = registry_model.list_manifest_layers(another_manifest) assert len(layers) == 1 + + +def test_create_manifest_and_retarget_tag_with_labels(registry_model): + repository_ref = registry_model.lookup_repository('devtable', 'simple') + latest_tag = registry_model.get_repo_tag(repository_ref, 'latest', include_legacy_image=True) + manifest = registry_model.get_manifest_for_tag(latest_tag).get_parsed_manifest() + + json_metadata = { + 'id': latest_tag.legacy_image.docker_image_id, + 'config': { + 'Labels': { + 'quay.expires-after': '2w', + }, + }, + } + + builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'anothertag') + builder.add_layer(manifest.blob_digests[0], json.dumps(json_metadata)) + sample_manifest = builder.build(docker_v2_signing_key) + assert sample_manifest is not None + + another_manifest, tag = registry_model.create_manifest_and_retarget_tag(repository_ref, + sample_manifest, + 'anothertag') + assert another_manifest is not None + assert tag is not None + + assert tag.name == 'anothertag' + assert another_manifest.get_parsed_manifest().manifest_dict == sample_manifest.manifest_dict + + layers = registry_model.list_manifest_layers(another_manifest) + assert len(layers) == 1 + + # Ensure the labels were applied. + assert tag.lifetime_end_ms is not None diff --git a/data/registry_model/test/test_manifestbuilder.py b/data/registry_model/test/test_manifestbuilder.py index f1783fc95..381ac9ea3 100644 --- a/data/registry_model/test/test_manifestbuilder.py +++ b/data/registry_model/test/test_manifestbuilder.py @@ -10,15 +10,16 @@ from mock import patch from data.registry_model.blobuploader import BlobUploadSettings, upload_blob from data.registry_model.manifestbuilder import create_manifest_builder, lookup_manifest_builder from data.registry_model.registry_pre_oci_model import PreOCIModel +from data.registry_model.registry_oci_model import OCIModel from storage.distributedstorage import DistributedStorage from storage.fakestorage import FakeStorage from test.fixtures import * -@pytest.fixture() -def pre_oci_model(initialized_db): - return PreOCIModel() +@pytest.fixture(params=[PreOCIModel, OCIModel]) +def registry_model(request, initialized_db): + return request.param() @pytest.fixture() @@ -33,8 +34,8 @@ def fake_session(): ('someid', 'parentid', 'some data')], id='Multi layer'), ]) -def test_build_manifest(layers, fake_session, pre_oci_model): - repository_ref = pre_oci_model.lookup_repository('devtable', 'complex') +def test_build_manifest(layers, fake_session, registry_model): + repository_ref = registry_model.lookup_repository('devtable', 'complex') storage = DistributedStorage({'local_us': FakeStorage(None)}, ['local_us']) settings = BlobUploadSettings('2M', 512 * 1024, 3600) app_config = {'TESTING': True} @@ -67,13 +68,13 @@ def test_build_manifest(layers, fake_session, pre_oci_model): assert tag in builder.committed_tags # Verify the legacy image for the tag. - found = pre_oci_model.get_repo_tag(repository_ref, 'somenewtag', include_legacy_image=True) + found = registry_model.get_repo_tag(repository_ref, 'somenewtag', include_legacy_image=True) assert found assert found.name == 'somenewtag' assert found.legacy_image.docker_image_id == layers[-1][0] # Verify the blob and manifest. - manifest = pre_oci_model.get_manifest_for_tag(found) + manifest = registry_model.get_manifest_for_tag(found) assert manifest parsed = manifest.get_parsed_manifest() @@ -87,8 +88,8 @@ def test_build_manifest(layers, fake_session, pre_oci_model): assert parsed.leaf_layer_v1_image_id == layers[-1][0] -def test_build_manifest_missing_parent(fake_session, pre_oci_model): - repository_ref = pre_oci_model.lookup_repository('devtable', 'complex') +def test_build_manifest_missing_parent(fake_session, registry_model): + repository_ref = registry_model.lookup_repository('devtable', 'complex') builder = create_manifest_builder(repository_ref) assert builder.start_layer('somelayer', json.dumps({'id': 'somelayer', 'parent': 'someparent'}), diff --git a/endpoints/api/tag.py b/endpoints/api/tag.py index f7fe496d4..8f026084c 100644 --- a/endpoints/api/tag.py +++ b/endpoints/api/tag.py @@ -217,6 +217,7 @@ class RepositoryTagImages(RepositoryParamResource): all_images = None if parsed_args['owned']: + # TODO(jschorr): Remove the `owned` image concept once we are fully on V2_2. all_images = registry_model.get_legacy_images_owned_by_tag(tag_ref) else: image_with_parents = registry_model.get_legacy_image(repo_ref, image_id, include_parents=True) diff --git a/endpoints/v2/test/test_manifest.py b/endpoints/v2/test/test_manifest.py index b16278e01..960501052 100644 --- a/endpoints/v2/test/test_manifest.py +++ b/endpoints/v2/test/test_manifest.py @@ -52,4 +52,4 @@ def test_e2e_query_count_manifest_norewrite(client, app): conduct_call(client, 'v2.write_manifest_by_digest', url_for, 'PUT', params, expected_code=202, headers=headers, raw_body=tag_manifest.json_data) - assert counter.count <= 25 + assert counter.count <= 27 diff --git a/initdb.py b/initdb.py index 2294bbfb4..4ea4de50f 100644 --- a/initdb.py +++ b/initdb.py @@ -21,10 +21,11 @@ from data.database import (db, all_models, Role, TeamRole, Visibility, LoginServ ServiceKeyApprovalType, MediaType, LabelSourceType, UserPromptKind, RepositoryKind, User, DisableReason, DeletedNamespace, appr_classes, ApprTagKind, ApprBlobPlacementLocation, Repository, TagKind, - ManifestChild) + ManifestChild, TagToRepositoryTag, get_epoch_timestamp_ms) from data import model from data.queue import WorkQueue from data.registry_model import registry_model +from data.registry_model.registry_pre_oci_model import pre_oci_model from app import app, storage as store, tf from storage.basestorage import StoragePaths from image.docker.schema1 import DOCKER_SCHEMA1_CONTENT_TYPES @@ -136,23 +137,25 @@ def __create_subtree(with_storage, repo, structure, creator_username, parent, ta repo_ref = registry_model.lookup_repository(repo.namespace_user.username, repo.name) for tag_name in last_node_tags: - new_tag = model.tag.create_or_update_tag(repo.namespace_user.username, repo.name, tag_name, - new_image.docker_image_id) + adjusted_tag_name = tag_name + now_ms = None + if tag_name[0] == '#': + adjusted_tag_name = tag_name[1:] + now_ms = get_epoch_timestamp_ms() - 1000 + + new_tag = model.tag.create_or_update_tag(repo.namespace_user.username, repo.name, + adjusted_tag_name, + new_image.docker_image_id, + now_ms=now_ms) + derived = model.image.find_or_create_derived_storage(new_tag, 'squash', 'local_us') model.storage.find_or_create_storage_signature(derived, 'gpg2') - tag = registry_model.get_repo_tag(repo_ref, tag_name) - registry_model.backfill_manifest_for_tag(tag) + tag = pre_oci_model.get_repo_tag(repo_ref, adjusted_tag_name) + assert tag._db_id == new_tag.id + assert pre_oci_model.backfill_manifest_for_tag(tag) tag_map[tag_name] = new_tag - for tag_name in last_node_tags: - if tag_name[0] == '#': - found_tag = tag_map[tag_name] - found_tag.name = tag_name[1:] - found_tag.lifetime_end_ts = tag_map[tag_name[1:]].lifetime_start_ts - found_tag.lifetime_start_ts = found_tag.lifetime_end_ts - 10 - found_tag.save() - for subtree in subtrees: __create_subtree(with_storage, repo, subtree, creator_username, new_image, tag_map) @@ -592,7 +595,7 @@ def populate_database(minimal=False, with_storage=False): (1, [], None)], None)], None)) __generate_repository(with_storage, new_user_1, 'history', 'Historical repository.', False, - [], (4, [(2, [], 'latest'), (3, [], '#latest')], None)) + [], (4, [(2, [], '#latest'), (3, [], 'latest')], None)) __generate_repository(with_storage, new_user_1, 'complex', 'Complex repository with many branches and tags.', diff --git a/test/registry/protocol_fixtures.py b/test/registry/protocol_fixtures.py index 7455aa811..5a07cac2b 100644 --- a/test/registry/protocol_fixtures.py +++ b/test/registry/protocol_fixtures.py @@ -23,6 +23,20 @@ def basic_images(): ] + +@pytest.fixture(scope="session") +def different_images(): + """ Returns different basic images for push and pull testing. """ + # Note: order is from base layer down to leaf. + parent_bytes = layer_bytes_for_contents('different parent contents') + image_bytes = layer_bytes_for_contents('some different contents') + return [ + Image(id='anotherparentid', bytes=parent_bytes, parent_id=None), + Image(id='anothersomeid', bytes=image_bytes, parent_id='anotherparentid'), + ] + + + @pytest.fixture(scope="session") def sized_images(): """ Returns basic images (with sizes) for push and pull testing. """ diff --git a/test/registry/registry_tests.py b/test/registry/registry_tests.py index dac558872..a9aaaa4d4 100644 --- a/test/registry/registry_tests.py +++ b/test/registry/registry_tests.py @@ -1,4 +1,5 @@ # pylint: disable=W0401, W0621, W0613, W0614, R0913 +import os import hashlib import tarfile @@ -51,7 +52,30 @@ def test_multi_layer_images_push_pull(pusher, puller, multi_layer_images, livese credentials=credentials) +def test_overwrite_tag(pusher, puller, basic_images, different_images, liveserver_session, + app_reloader): + """ Test: Basic push and pull of an image to a new repository, followed by a push to the same + tag with different images. """ + credentials = ('devtable', 'password') + # Push a new repository. + pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', basic_images, + credentials=credentials) + + # Pull the repository to verify. + puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', basic_images, + credentials=credentials) + + # Push a new repository. + pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', different_images, + credentials=credentials) + + # Pull the repository to verify. + puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', different_images, + credentials=credentials) + + +@pytest.mark.skipif(os.getenv('OCI_DATA_MODEL') == 'true', reason="no backfill in new model") def test_no_tag_manifests(pusher, puller, basic_images, liveserver_session, app_reloader, liveserver, registry_server_executor): """ Test: Basic pull without manifests. """ diff --git a/test/test_api_usage.py b/test/test_api_usage.py index bc19d5074..fab4026bf 100644 --- a/test/test_api_usage.py +++ b/test/test_api_usage.py @@ -2909,7 +2909,7 @@ class TestListAndDeleteTag(ApiTestCase): for i in xrange(1, 9): tag_name = "tag" + str(i) remaining_tags.add(tag_name) - registry_model.retarget_tag(repo_ref, tag_name, latest_tag.legacy_image) + assert registry_model.retarget_tag(repo_ref, tag_name, latest_tag.legacy_image) # Make sure we can iterate over all of them. json = self.getJsonResponse(ListRepositoryTags, params=dict(