diff --git a/data/model/oci/__init__.py b/data/model/oci/__init__.py index 2bdea0ae3..39bcef2eb 100644 --- a/data/model/oci/__init__.py +++ b/data/model/oci/__init__.py @@ -1,6 +1,7 @@ # There MUST NOT be any circular dependencies between these subsections. If there are fix it by # moving the minimal number of things to shared from data.model.oci import ( + blob, label, manifest, shared, diff --git a/data/model/oci/blob.py b/data/model/oci/blob.py new file mode 100644 index 000000000..435d8fefb --- /dev/null +++ b/data/model/oci/blob.py @@ -0,0 +1,26 @@ +from data.database import ImageStorage, ManifestBlob +from data.model import BlobDoesNotExist +from data.model.storage import get_storage_by_subquery, InvalidImageException +from data.model.blob import get_repository_blob_by_digest as legacy_get + +def get_repository_blob_by_digest(repository, blob_digest): + """ Find the content-addressable blob linked to the specified repository and + returns it or None if none. + """ + try: + storage_id_query = (ImageStorage + .select(ImageStorage.id) + .join(ManifestBlob) + .where(ManifestBlob.repository == repository, + ImageStorage.content_checksum == blob_digest, + ImageStorage.uploading == False) + .limit(1)) + + return get_storage_by_subquery(storage_id_query) + except InvalidImageException: + # TODO(jschorr): Remove once we are no longer using the legacy tables. + # Try the legacy call. + try: + return legacy_get(repository, blob_digest) + except BlobDoesNotExist: + return None diff --git a/data/model/storage.py b/data/model/storage.py index 79cff50e1..d8f3798a8 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -270,7 +270,7 @@ def get_layer_path_for_storage(storage_uuid, cas_path, content_checksum): return store.blob_path(content_checksum) -def lookup_repo_storages_by_content_checksum(repo, checksums): +def lookup_repo_storages_by_content_checksum(repo, checksums, by_manifest=False): """ Looks up repository storages (without placements) matching the given repository and checksum. """ # There may be many duplicates of the checksums, so for performance reasons we are going @@ -279,14 +279,29 @@ def lookup_repo_storages_by_content_checksum(repo, checksums): for counter, checksum in enumerate(set(checksums)): query_alias = 'q{0}'.format(counter) - candidate_subq = (ImageStorage - .select(ImageStorage.id, ImageStorage.content_checksum, - ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path, - ImageStorage.uncompressed_size, ImageStorage.uploading) - .join(Image) - .where(Image.repository == repo, ImageStorage.content_checksum == checksum) - .limit(1) - .alias(query_alias)) + + # TODO(jschorr): Remove once we have a new-style model for tracking temp uploaded blobs and + # all legacy tables have been removed. + if by_manifest: + candidate_subq = (ImageStorage + .select(ImageStorage.id, ImageStorage.content_checksum, + ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path, + ImageStorage.uncompressed_size, ImageStorage.uploading) + .join(ManifestBlob) + .where(ManifestBlob.repository == repo, + ImageStorage.content_checksum == checksum) + .limit(1) + .alias(query_alias)) + else: + candidate_subq = (ImageStorage + .select(ImageStorage.id, ImageStorage.content_checksum, + ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path, + ImageStorage.uncompressed_size, ImageStorage.uploading) + .join(Image) + .where(Image.repository == repo, ImageStorage.content_checksum == checksum) + .limit(1) + .alias(query_alias)) + queries.append(ImageStorage .select(SQL('*')) .from_(candidate_subq)) diff --git a/data/registry_model/registry_oci_model.py b/data/registry_model/registry_oci_model.py index 55527b340..237516edb 100644 --- a/data/registry_model/registry_oci_model.py +++ b/data/registry_model/registry_oci_model.py @@ -9,7 +9,8 @@ from data.model import oci, DataModelException from data.model.oci.retriever import RepositoryContentRetriever from data.database import db_transaction, Image from data.registry_model.interface import RegistryDataInterface -from data.registry_model.datatypes import Tag, Manifest, LegacyImage, Label, SecurityScanStatus +from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus, + Blob) from data.registry_model.shared import SharedModel from data.registry_model.label_handlers import apply_label_to_manifest from image.docker import ManifestException @@ -430,7 +431,8 @@ class OCIModel(SharedModel, RegistryDataInterface): logger.exception('Could not parse and validate manifest `%s`', manifest._db_id) return None - return self._list_manifest_layers(manifest_obj.repository_id, parsed, include_placements) + return self._list_manifest_layers(manifest_obj.repository_id, parsed, include_placements, + by_manifest=True) def lookup_derived_image(self, manifest, verb, varying_metadata=None, include_placements=False): """ @@ -501,5 +503,33 @@ class OCIModel(SharedModel, RegistryDataInterface): li = LegacyImage.for_image(legacy_image) return Manifest.for_manifest(created_manifest.manifest, li) + def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False): + """ + Returns the blob in the repository with the given digest, if any or None if none. Note that + there may be multiple records in the same repository for the same blob digest, so the return + value of this function may change. + """ + image_storage = oci.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest) + if image_storage is None: + return None + + assert image_storage.cas_path is not None + + placements = None + if include_placements: + placements = list(model.storage.get_storage_locations(image_storage.uuid)) + + return Blob.for_image_storage(image_storage, + storage_path=model.storage.get_layer_path(image_storage), + placements=placements) + + def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False): + """ Returns an *ordered list* of the layers found in the parsed manifest, starting at the base + and working towards the leaf, including the associated Blob and its placements + (if specified). + """ + return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements, + by_manifest=True) + oci_model = OCIModel() diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index 59d072c30..0ce9cdf40 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -556,5 +556,33 @@ class PreOCIModel(SharedModel, RegistryDataInterface): """ raise NotImplementedError('Unsupported in pre OCI model') + def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False): + """ + Returns the blob in the repository with the given digest, if any or None if none. Note that + there may be multiple records in the same repository for the same blob digest, so the return + value of this function may change. + """ + try: + image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest) + except model.BlobDoesNotExist: + return None + + assert image_storage.cas_path is not None + + placements = None + if include_placements: + placements = list(model.storage.get_storage_locations(image_storage.uuid)) + + return Blob.for_image_storage(image_storage, + storage_path=model.storage.get_layer_path(image_storage), + placements=placements) + + def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False): + """ Returns an *ordered list* of the layers found in the parsed manifest, starting at the base + and working towards the leaf, including the associated Blob and its placements + (if specified). + """ + return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements) + pre_oci_model = PreOCIModel() diff --git a/data/registry_model/shared.py b/data/registry_model/shared.py index 2b742d4b8..c220c9652 100644 --- a/data/registry_model/shared.py +++ b/data/registry_model/shared.py @@ -1,6 +1,7 @@ # pylint: disable=protected-access import logging +from abc import abstractmethod from collections import defaultdict from data import database @@ -149,26 +150,9 @@ class SharedModel: return self.get_repo_blob_by_digest(repository_ref, blob_digest, include_placements=True) + @abstractmethod def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False): - """ - Returns the blob in the repository with the given digest, if any or None if none. Note that - there may be multiple records in the same repository for the same blob digest, so the return - value of this function may change. - """ - try: - image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest) - except model.BlobDoesNotExist: - return None - - assert image_storage.cas_path is not None - - placements = None - if include_placements: - placements = list(model.storage.get_storage_locations(image_storage.uuid)) - - return Blob.for_image_storage(image_storage, - storage_path=model.storage.get_layer_path(image_storage), - placements=placements) + pass def create_blob_upload(self, repository_ref, new_upload_id, location_name, storage_metadata): """ Creates a new blob upload and returns a reference. If the blob upload could not be @@ -306,14 +290,7 @@ class SharedModel: return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob) - def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False): - """ Returns an *ordered list* of the layers found in the parsed manifest, starting at the base - and working towards the leaf, including the associated Blob and its placements - (if specified). - """ - return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements) - - def _list_manifest_layers(self, repo_id, parsed, include_placements=False): + def _list_manifest_layers(self, repo_id, parsed, include_placements=False, by_manifest=False): """ Returns an *ordered list* of the layers found in the manifest, starting at the base and working towards the leaf, including the associated Blob and its placements (if specified). Returns None if the manifest could not be parsed and validated. @@ -321,7 +298,8 @@ class SharedModel: storage_map = {} if parsed.local_blob_digests: blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id, - parsed.local_blob_digests) + parsed.local_blob_digests, + by_manifest=by_manifest) storage_map = {blob.content_checksum: blob for blob in blob_query} manifest_layers = [] diff --git a/data/registry_model/test/test_interface.py b/data/registry_model/test/test_interface.py index b99db1d06..580dd2212 100644 --- a/data/registry_model/test/test_interface.py +++ b/data/registry_model/test/test_interface.py @@ -672,25 +672,26 @@ def test_commit_blob_upload(registry_model): assert not registry_model.lookup_blob_upload(repository_ref, blob_upload.upload_id) -def test_mount_blob_into_repository(registry_model): - repository_ref = registry_model.lookup_repository('devtable', 'simple') - latest_tag = registry_model.get_repo_tag(repository_ref, 'latest') - manifest = registry_model.get_manifest_for_tag(latest_tag) +# TODO(jschorr): Re-enable for OCI model once we have a new table for temporary blobs. +def test_mount_blob_into_repository(pre_oci_model): + repository_ref = pre_oci_model.lookup_repository('devtable', 'simple') + latest_tag = pre_oci_model.get_repo_tag(repository_ref, 'latest') + manifest = pre_oci_model.get_manifest_for_tag(latest_tag) - target_repository_ref = registry_model.lookup_repository('devtable', 'complex') + target_repository_ref = pre_oci_model.lookup_repository('devtable', 'complex') - layers = registry_model.list_manifest_layers(manifest, include_placements=True) + layers = pre_oci_model.list_manifest_layers(manifest, include_placements=True) assert layers for layer in layers: # Ensure the blob doesn't exist under the repository. - assert not registry_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest) + assert not pre_oci_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest) # Mount the blob into the repository. - assert registry_model.mount_blob_into_repository(layer.blob, target_repository_ref, 60) + assert pre_oci_model.mount_blob_into_repository(layer.blob, target_repository_ref, 60) # Ensure it now exists. - found = registry_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest) + found = pre_oci_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest) assert found == layer.blob @@ -725,19 +726,21 @@ def test_get_cached_repo_blob(registry_model): with patch('data.registry_model.registry_pre_oci_model.model.blob.get_repository_blob_by_digest', fail): - # Make sure we can load again, which should hit the cache. - cached = registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', blob.digest) - assert cached.digest == blob.digest - assert cached.uuid == blob.uuid - assert cached.compressed_size == blob.compressed_size - assert cached.uncompressed_size == blob.uncompressed_size - assert cached.uploading == blob.uploading - assert cached.placements == blob.placements + with patch('data.registry_model.registry_oci_model.model.oci.blob.get_repository_blob_by_digest', + fail): + # Make sure we can load again, which should hit the cache. + cached = registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', blob.digest) + assert cached.digest == blob.digest + assert cached.uuid == blob.uuid + assert cached.compressed_size == blob.compressed_size + assert cached.uncompressed_size == blob.uncompressed_size + assert cached.uploading == blob.uploading + assert cached.placements == blob.placements - # Try another blob, which should fail since the DB is not connected and the cache - # does not contain the blob. - with pytest.raises(SomeException): - registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', 'some other digest') + # Try another blob, which should fail since the DB is not connected and the cache + # does not contain the blob. + with pytest.raises(SomeException): + registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', 'some other digest') def test_create_manifest_and_retarget_tag(registry_model): diff --git a/image/docker/schema2/test/test_manifest.py b/image/docker/schema2/test/test_manifest.py index 4a875c794..8f6327a83 100644 --- a/image/docker/schema2/test/test_manifest.py +++ b/image/docker/schema2/test/test_manifest.py @@ -6,7 +6,7 @@ from image.docker.schema1 import (DockerSchema1ManifestBuilder, DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE) from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest, - DockerSchema2ManifestBuilder, EMPTY_BLOB_DIGEST) + DockerSchema2ManifestBuilder, EMPTY_LAYER_BLOB_DIGEST) from image.docker.schema2.test.test_config import CONFIG_BYTES from image.docker.schemautil import ContentRetrieverForTesting @@ -279,7 +279,7 @@ def test_generate_legacy_layers(): legacy_layers = list(manifest.generate_legacy_layers({}, retriever)) assert len(legacy_layers) == 3 assert legacy_layers[0].content_checksum == 'sha256:abc123' - assert legacy_layers[1].content_checksum == EMPTY_BLOB_DIGEST + assert legacy_layers[1].content_checksum == EMPTY_LAYER_BLOB_DIGEST assert legacy_layers[2].content_checksum == 'sha256:def456' assert legacy_layers[0].created == "2018-04-03T18:37:09.284840891Z" diff --git a/workers/manifestbackfillworker.py b/workers/manifestbackfillworker.py index bed90c6a6..b92e58a72 100644 --- a/workers/manifestbackfillworker.py +++ b/workers/manifestbackfillworker.py @@ -96,6 +96,9 @@ class BrokenManifest(ManifestInterface): def has_legacy_image(self): return False + def get_requires_empty_layer_blob(self, content_retriever): + return False + class ManifestBackfillWorker(Worker): def __init__(self):