From 82ee21bfbd708484e9d941ecf17b55b863c5ec07 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 19 Nov 2018 12:24:04 +0200 Subject: [PATCH] Change OCI data model to use a content retriever for the various manifest operations --- data/model/oci/manifest.py | 50 ++++++------------------ data/model/oci/retriever.py | 37 ++++++++++++++++++ data/model/oci/test/test_oci_manifest.py | 14 +++++-- image/docker/schema2/list.py | 4 +- image/docker/schema2/test/test_list.py | 2 +- 5 files changed, 61 insertions(+), 46 deletions(-) create mode 100644 data/model/oci/retriever.py diff --git a/data/model/oci/manifest.py b/data/model/oci/manifest.py index c63437e9f..e1023881b 100644 --- a/data/model/oci/manifest.py +++ b/data/model/oci/manifest.py @@ -9,9 +9,8 @@ from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, Man from data.model import BlobDoesNotExist from data.model.oci.tag import filter_to_alive_tags from data.model.oci.label import create_manifest_label -from data.model.storage import (lookup_repo_storages_by_content_checksum, get_storage_locations, - get_layer_path) -from data.model.blob import get_repository_blob_by_digest +from data.model.oci.retriever import RepositoryContentRetriever +from data.model.storage import lookup_repo_storages_by_content_checksum from data.model.image import lookup_repository_images, get_image, synthesize_v1_image from image.docker.schema1 import ManifestException from image.docker.schema2.list import MalformedSchema2ManifestList @@ -57,13 +56,9 @@ def get_or_create_manifest(repository_id, manifest_interface_instance, storage): def _create_manifest(repository_id, manifest_interface_instance, storage): - digests = set(manifest_interface_instance.local_blob_digests) - - def _lookup_digest(digest): - return _retrieve_bytes_in_storage(repository_id, digest, storage) - # Load, parse and get/create the child manifests, if any. - child_manifest_refs = manifest_interface_instance.child_manifests(_lookup_digest) + retriever = RepositoryContentRetriever.for_repository(repository_id, storage) + child_manifest_refs = manifest_interface_instance.child_manifests(retriever) child_manifest_rows = [] child_manifest_label_dicts = [] @@ -72,25 +67,13 @@ def _create_manifest(repository_id, manifest_interface_instance, storage): # Load and parse the child manifest. try: child_manifest = child_manifest_ref.manifest_obj - except ManifestException: - logger.exception('Could not load manifest list for manifest `%s`', - manifest_interface_instance.digest) - return None - except MalformedSchema2ManifestList: - logger.exception('Could not load manifest list for manifest `%s`', - manifest_interface_instance.digest) - return None - except BlobDoesNotExist: - logger.exception('Could not load manifest list for manifest `%s`', - manifest_interface_instance.digest) - return None - except IOError: + except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist, IOError): logger.exception('Could not load manifest list for manifest `%s`', manifest_interface_instance.digest) return None # Retrieve its labels. - labels = child_manifest.get_manifest_labels(_lookup_digest) + labels = child_manifest.get_manifest_labels(retriever) if labels is None: logger.exception('Could not load manifest labels for child manifest') return None @@ -111,6 +94,7 @@ def _create_manifest(repository_id, manifest_interface_instance, storage): child_manifest_label_dicts.append(labels) # Ensure all the blobs in the manifest exist. + digests = set(manifest_interface_instance.local_blob_digests) blob_map = {} if digests: query = lookup_repo_storages_by_content_checksum(repository_id, digests) @@ -126,7 +110,7 @@ def _create_manifest(repository_id, manifest_interface_instance, storage): legacy_image = None if manifest_interface_instance.leaf_layer_v1_image_id is not None: legacy_image_id = _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, - storage) + retriever) if legacy_image_id is None: return None @@ -167,7 +151,7 @@ def _create_manifest(repository_id, manifest_interface_instance, storage): ManifestChild.insert_many(children_to_insert).execute() # Define the labels for the manifest (if any). - labels = manifest_interface_instance.get_manifest_labels(_lookup_digest) + labels = manifest_interface_instance.get_manifest_labels(retriever) if labels: for key, value in labels.iteritems(): media_type = 'application/json' if is_json(value) else 'text/plain' @@ -190,10 +174,7 @@ def _create_manifest(repository_id, manifest_interface_instance, storage): return CreatedManifest(manifest=manifest, newly_created=True, labels_to_apply=labels_to_apply) -def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, storage): - def _lookup_digest(digest): - return _retrieve_bytes_in_storage(repository_id, digest, storage) - +def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, retriever): # Lookup all the images and their parent images (if any) inside the manifest. # This will let us know which v1 images we need to synthesize and which ones are invalid. docker_image_ids = list(manifest_interface_instance.legacy_image_ids) @@ -203,7 +184,7 @@ def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, # Rewrite any v1 image IDs that do not match the checksum in the database. try: rewritten_images = manifest_interface_instance.generate_legacy_layers(image_storage_map, - _lookup_digest) + retriever) rewritten_images = list(rewritten_images) parent_image_map = {} @@ -235,12 +216,3 @@ def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, return None return rewritten_images[-1].image_id - - -def _retrieve_bytes_in_storage(repository_id, digest, storage): - blob = get_repository_blob_by_digest(repository_id, digest) - if blob is None: - return None - - placements = list(get_storage_locations(blob.uuid)) - return storage.get_content(placements, get_layer_path(blob)) diff --git a/data/model/oci/retriever.py b/data/model/oci/retriever.py new file mode 100644 index 000000000..78a07b6ad --- /dev/null +++ b/data/model/oci/retriever.py @@ -0,0 +1,37 @@ +from image.docker.interfaces import ContentRetriever +from data.database import Manifest +from data.model.blob import get_repository_blob_by_digest +from data.model.storage import get_storage_locations, get_layer_path + +class RepositoryContentRetriever(ContentRetriever): + """ Implementation of the ContentRetriever interface for manifests that retrieves + config blobs and child manifests for the specified repository. + """ + def __init__(self, repository_id, storage): + self.repository_id = repository_id + self.storage = storage + + @classmethod + def for_repository(cls, repository_id, storage): + return RepositoryContentRetriever(repository_id, storage) + + def get_manifest_bytes_with_digest(self, digest): + """ Returns the bytes of the manifest with the given digest or None if none found. """ + query = (Manifest + .select() + .where(Manifest.repository == self.repository_id) + .where(Manifest.digest == digest)) + + try: + return query.get().manifest_bytes + except Manifest.DoesNotExist: + return None + + def get_blob_bytes_with_digest(self, digest): + """ Returns the bytes of the blob with the given digest or None if none found. """ + blob = get_repository_blob_by_digest(self.repository_id, digest) + if blob is None: + return None + + placements = list(get_storage_locations(blob.uuid)) + return self.storage.get_content(placements, get_layer_path(blob)) diff --git a/data/model/oci/test/test_oci_manifest.py b/data/model/oci/test/test_oci_manifest.py index def62400d..60f9f1c8e 100644 --- a/data/model/oci/test/test_oci_manifest.py +++ b/data/model/oci/test/test_oci_manifest.py @@ -50,7 +50,8 @@ def test_lookup_manifest_dead_tag(initialized_db): def _populate_blob(content): digest = str(sha256_digest(content)) location = ImageStorageLocation.get(name='local_us') - blob = store_blob_record_and_temp_link('devtable', 'newrepo', digest, location, len(content), 120) + blob = store_blob_record_and_temp_link('devtable', 'newrepo', digest, location, + len(content), 120) storage.put_content(['local_us'], get_layer_path(blob), content) return blob, digest @@ -205,9 +206,14 @@ def test_get_or_create_manifest_list(initialized_db): v2_builder.add_layer(random_digest, len(random_data)) v2_manifest = v2_builder.build() - # Write the manifests as blobs. - _populate_blob(v1_manifest.bytes) - _populate_blob(v2_manifest.bytes) + # Write the manifests. + v1_created = get_or_create_manifest(repository, v1_manifest, storage) + assert v1_created + assert v1_created.manifest.digest == v1_manifest.digest + + v2_created = get_or_create_manifest(repository, v2_manifest, storage) + assert v2_created + assert v2_created.manifest.digest == v2_manifest.digest # Build the manifest list. list_builder = DockerSchema2ManifestListBuilder() diff --git a/image/docker/schema2/list.py b/image/docker/schema2/list.py index 6658f74e9..865f0662f 100644 --- a/image/docker/schema2/list.py +++ b/image/docker/schema2/list.py @@ -225,8 +225,8 @@ class DockerSchema2ManifestList(ManifestInterface): @property def blob_digests(self): - manifests = self._parsed[DOCKER_SCHEMA2_MANIFESTLIST_MANIFESTS_KEY] - return [m[DOCKER_SCHEMA2_MANIFESTLIST_DIGEST_KEY] for m in manifests] + # Manifest lists have no blob digests, since everything is stored as a manifest. + return [] @property def local_blob_digests(self): diff --git a/image/docker/schema2/test/test_list.py b/image/docker/schema2/test/test_list.py index c4bfc16ac..8fbdfbe98 100644 --- a/image/docker/schema2/test/test_list.py +++ b/image/docker/schema2/test/test_list.py @@ -79,7 +79,7 @@ def test_valid_manifestlist(): assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' assert manifestlist.bytes == MANIFESTLIST_BYTES assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES) - assert set(manifestlist.blob_digests) == {'sha256:e6', 'sha256:5b'} + assert not manifestlist.blob_digests for index, manifest in enumerate(manifestlist.manifests(retriever)): if index == 0: