From 0ae062be62f945967d4f8c05c5ec6e1d754ae857 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 20 Sep 2018 17:49:00 -0400 Subject: [PATCH] Add manifest creation to new registry data model interface --- data/model/tag.py | 11 ++ data/registry_model/interface.py | 19 ++++ data/registry_model/label_handlers.py | 28 +++++ data/registry_model/registry_pre_oci_model.py | 106 +++++++++++++++++- .../registry_model/test/test_pre_oci_model.py | 18 ++- image/docker/interfaces.py | 6 + image/docker/schema1.py | 4 + image/docker/schema2/manifest.py | 4 + workers/manifestbackfillworker.py | 4 + 9 files changed, 195 insertions(+), 5 deletions(-) create mode 100644 data/registry_model/label_handlers.py diff --git a/data/model/tag.py b/data/model/tag.py index 52fa1ba39..f87c99c70 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -1,6 +1,7 @@ import logging from calendar import timegm +from datetime import datetime from uuid import uuid4 from peewee import IntegrityError, JOIN, fn @@ -757,6 +758,16 @@ def change_repository_tag_expiration(namespace_name, repo_name, tag_name, expira return (None, False) +def set_tag_expiration_for_manifest(tag_manifest, expiration_sec): + """ + Changes the expiration of the tag that point to the given manifest to be its lifetime start + + the expiration seconds. + """ + expiration_time_in_seconds = tag_manifest.tag.lifetime_start_ts + expiration_sec + expiration_date = datetime.utcfromtimestamp(expiration_time_in_seconds) + return change_tag_expiration(tag_manifest.tag, expiration_date) + + def change_tag_expiration(tag, expiration_date): """ Changes the expiration of the given tag to the given expiration datetime. If the expiration datetime is None, then the tag is marked as not expiring. diff --git a/data/registry_model/interface.py b/data/registry_model/interface.py index f5949b538..099851d67 100644 --- a/data/registry_model/interface.py +++ b/data/registry_model/interface.py @@ -35,6 +35,19 @@ class RegistryDataInterface(object): """ Looks up the manifest with the given digest under the given repository and returns it or None if none. """ + @abstractmethod + def create_manifest_and_retarget_tag(self, repository_ref, manifest_interface_instance, tag_name): + """ Creates a manifest in a repository, adding all of the necessary data in the model. + + The `manifest_interface_instance` parameter must be an instance of the manifest + interface as returned by the image/docker package. + + Note that all blobs referenced by the manifest must exist under the repository or this + method will fail and return None. + + Returns a reference to the (created manifest, tag) or (None, None) on error. + """ + @abstractmethod def get_legacy_images(self, repository_ref): """ @@ -255,3 +268,9 @@ class RegistryDataInterface(object): operations if an existing blob from another repositroy is being pushed. Returns False if the mounting fails. """ + + @abstractmethod + def set_tags_expiration_for_manifest(self, manifest, expiration_sec): + """ + Sets the expiration on all tags that point to the given manifest to that specified. + """ diff --git a/data/registry_model/label_handlers.py b/data/registry_model/label_handlers.py new file mode 100644 index 000000000..07635537a --- /dev/null +++ b/data/registry_model/label_handlers.py @@ -0,0 +1,28 @@ +import logging + +from util.timedeltastring import convert_to_timedelta + +logger = logging.getLogger(__name__) + +def _expires_after(label_dict, manifest, model): + """ Sets the expiration of a manifest based on the quay.expires-in label. """ + try: + timedelta = convert_to_timedelta(label_dict['value']) + except ValueError: + logger.exception('Could not convert %s to timedeltastring', label_dict['value']) + return + + total_seconds = timedelta.total_seconds() + logger.debug('Labeling manifest %s with expiration of %s', manifest, total_seconds) + model.set_tags_expiration_for_manifest(manifest, total_seconds) + + +_LABEL_HANDLES = { + 'quay.expires-after': _expires_after, +} + +def apply_label_to_manifest(label_dict, manifest, model): + """ Runs the handler defined, if any, for the given label. """ + handler = _LABEL_HANDLES.get(label_dict['key']) + if handler is not None: + handler(label_dict, manifest, model) diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index 72ae699b5..4031d5eef 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -8,11 +8,15 @@ from peewee import IntegrityError from data import database from data import model +from data.database import db_transaction from data.registry_model.interface import RegistryDataInterface from data.registry_model.datatypes import (Tag, RepositoryReference, Manifest, LegacyImage, Label, SecurityScanStatus, ManifestLayer, Blob, DerivedImage, TorrentInfo, BlobUpload) -from image.docker.schema1 import DockerSchema1ManifestBuilder, ManifestException +from data.registry_model.label_handlers import apply_label_to_manifest +from image.docker.schema1 import (DockerSchema1ManifestBuilder, ManifestException, + DockerSchema1Manifest) +from util.validation import is_json logger = logging.getLogger(__name__) @@ -81,6 +85,75 @@ class PreOCIModel(RegistryDataInterface): return Manifest.for_tag_manifest(tag_manifest, legacy_image) + def create_manifest_and_retarget_tag(self, repository_ref, manifest_interface_instance, tag_name): + """ Creates a manifest in a repository, adding all of the necessary data in the model. + + The `manifest_interface_instance` parameter must be an instance of the manifest + interface as returned by the image/docker package. + + Note that all blobs referenced by the manifest must exist under the repository or this + method will fail and return None. + + Returns a reference to the (created manifest, tag) or (None, None) on error. + """ + # NOTE: Only Schema1 is supported by the pre_oci_model. + assert isinstance(manifest_interface_instance, DockerSchema1Manifest) + if not manifest_interface_instance.layers: + return None, None + + # Ensure all the blobs in the manifest exist. + digests = manifest_interface_instance.checksums + query = model.storage.lookup_repo_storages_by_content_checksum(repository_ref._db_id, digests) + blob_map = {s.content_checksum: s.id for s in query} + for layer in manifest_interface_instance.layers: + digest_str = str(layer.digest) + if digest_str not in blob_map: + return None, None + + # Lookup all the images and their parent images (if any) inside the manifest. + # This will let us know which v1 images we need to synthesize and which ones are invalid. + docker_image_ids = list(manifest_interface_instance.legacy_image_ids) + images_query = model.image.lookup_repository_images(repository_ref._db_id, docker_image_ids) + images_map = {i.docker_image_id: i.storage for i in images_query} + + # Rewrite any v1 image IDs that do not match the checksum in the database. + try: + rewritten_images = list(manifest_interface_instance.rewrite_invalid_image_ids(images_map)) + for rewritten_image in rewritten_images: + if not rewritten_image.image_id in images_map: + model.image.synthesize_v1_image( + repository_ref._db_id, + blob_map[rewritten_image.content_checksum], + rewritten_image.image_id, + rewritten_image.created, + rewritten_image.comment, + rewritten_image.command, + rewritten_image.compat_json, + rewritten_image.parent_image_id, + ) + except ManifestException: + logger.exception("exception when rewriting v1 metadata") + return None, None + + # Store the manifest pointing to the tag. + leaf_layer_id = rewritten_images[-1].image_id + tag_manifest, newly_created = model.tag.store_tag_manifest_for_repo(repository_ref._db_id, + tag_name, + manifest_interface_instance, + leaf_layer_id, + blob_map) + + manifest = Manifest.for_tag_manifest(tag_manifest) + + # Save the labels on the manifest. + if newly_created: + with self.batch_create_manifest_labels(manifest) as add_label: + for key, value in manifest.layers[-1].v1_metadata.labels.iteritems(): + media_type = 'application/json' if is_json(value) else 'text/plain' + add_label(key, value, 'manifest', media_type) + + return manifest, Tag.for_repository_tag(tag_manifest.tag) + def get_legacy_images(self, repository_ref): """ Returns an iterator of all the LegacyImage's defined in the matching repository. @@ -135,8 +208,17 @@ class PreOCIModel(RegistryDataInterface): except database.TagManifest.DoesNotExist: return None - label = model.label.create_manifest_label(tag_manifest, key, value, source_type_name, - media_type_name) + label_data = dict(key=key, value=value, source_type_name=source_type_name, + media_type_name=media_type_name) + + with db_transaction(): + # Create the label itself. + label = model.label.create_manifest_label(tag_manifest, key, value, source_type_name, + media_type_name) + + # Apply any changes to the manifest that the label prescribes. + apply_label_to_manifest(label_data, manifest, self) + return Label.for_label(label) @contextmanager @@ -164,7 +246,12 @@ class PreOCIModel(RegistryDataInterface): # TODO: make this truly batch once we've fully transitioned to V2_2 and no longer need # the mapping tables. for label in labels_to_add: - model.label.create_manifest_label(tag_manifest, **label) + with db_transaction(): + # Create the label itself. + model.label.create_manifest_label(tag_manifest, **label) + + # Apply any changes to the manifest that the label prescribes. + apply_label_to_manifest(label, manifest, self) def list_manifest_labels(self, manifest, key_prefix=None): """ Returns all labels found on the manifest. If specified, the key_prefix will filter the @@ -708,4 +795,15 @@ class PreOCIModel(RegistryDataInterface): expiration_sec) return bool(storage) + def set_tags_expiration_for_manifest(self, manifest, expiration_sec): + """ + Sets the expiration on all tags that point to the given manifest to that specified. + """ + try: + tag_manifest = database.TagManifest.get(id=manifest._db_id) + except database.TagManifest.DoesNotExist: + return None + + model.tag.set_tag_expiration_for_manifest(tag_manifest, expiration_sec) + pre_oci_model = PreOCIModel() diff --git a/data/registry_model/test/test_pre_oci_model.py b/data/registry_model/test/test_pre_oci_model.py index 3acb75146..06a43aaba 100644 --- a/data/registry_model/test/test_pre_oci_model.py +++ b/data/registry_model/test/test_pre_oci_model.py @@ -167,6 +167,23 @@ def test_manifest_labels(pre_oci_model): assert created not in pre_oci_model.list_manifest_labels(found_manifest) +def test_manifest_label_handlers(pre_oci_model): + repo = model.repository.get_repository('devtable', 'simple') + repository_ref = RepositoryReference.for_repo_obj(repo) + found_tag = pre_oci_model.get_repo_tag(repository_ref, 'latest') + found_manifest = pre_oci_model.get_manifest_for_tag(found_tag) + + # Ensure the tag has no expiration. + assert found_tag.lifetime_end_ts is None + + # Create a new label with an expires-after. + pre_oci_model.create_manifest_label(found_manifest, 'quay.expires-after', '2h', 'api') + + # Ensure the tag now has an expiration. + updated_tag = pre_oci_model.get_repo_tag(repository_ref, 'latest') + assert updated_tag.lifetime_end_ts == (updated_tag.lifetime_start_ts + (60 * 60 * 2)) + + def test_batch_labels(pre_oci_model): repo = model.repository.get_repository('devtable', 'history') repository_ref = RepositoryReference.for_repo_obj(repo) @@ -554,7 +571,6 @@ def test_torrent_info(pre_oci_model): assert torrent_info.pieces == 'foo' -<<<<<<< HEAD def test_blob_uploads(pre_oci_model): repository_ref = pre_oci_model.lookup_repository('devtable', 'simple') diff --git a/image/docker/interfaces.py b/image/docker/interfaces.py index 5ce12de93..62dd4563c 100644 --- a/image/docker/interfaces.py +++ b/image/docker/interfaces.py @@ -34,6 +34,12 @@ class ManifestInterface(object): """ Returns the Docker V1 image ID for the leaf (top) layer, if any, or None if none. """ pass + @abstractproperty + def legacy_image_ids(self): + """ Returns the Docker V1 image IDs for the layers of this manifest or None if not applicable. + """ + pass + @abstractproperty def blob_digests(self): """ Returns an iterator over all the blob digests referenced by this manifest, diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 4897f4926..f7184c43b 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -254,6 +254,10 @@ class DockerSchema1Manifest(ManifestInterface): def image_ids(self): return {mdata.v1_metadata.image_id for mdata in self.layers} + @property + def legacy_image_ids(self): + return {mdata.v1_metadata.image_id for mdata in self.layers} + @property def parent_image_ids(self): return {mdata.v1_metadata.parent_image_id for mdata in self.layers diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index f4e443f66..ceb07fa30 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -172,6 +172,10 @@ class DockerSchema2Manifest(ManifestInterface): def leaf_layer_v1_image_id(self): return list(self.layers_with_v1_ids)[-1].v1_id + @property + def legacy_image_ids(self): + return [l.v1_id for l in self.layers_with_v1_ids] + @property def blob_digests(self): return [str(layer.digest) for layer in self.layers] diff --git a/workers/manifestbackfillworker.py b/workers/manifestbackfillworker.py index 8593fccdd..decb9dd44 100644 --- a/workers/manifestbackfillworker.py +++ b/workers/manifestbackfillworker.py @@ -51,6 +51,10 @@ class BrokenManifest(ManifestInterface): def layers(self): return [] + @property + def legacy_image_ids(self): + return [] + @property def leaf_layer_v1_image_id(self): return None