Read blobs from new manifest blob table where relevant
This commit is contained in:
parent
4985040d31
commit
adccdd30ca
9 changed files with 146 additions and 62 deletions
|
@ -1,6 +1,7 @@
|
|||
# There MUST NOT be any circular dependencies between these subsections. If there are fix it by
|
||||
# moving the minimal number of things to shared
|
||||
from data.model.oci import (
|
||||
blob,
|
||||
label,
|
||||
manifest,
|
||||
shared,
|
||||
|
|
26
data/model/oci/blob.py
Normal file
26
data/model/oci/blob.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
from data.database import ImageStorage, ManifestBlob
|
||||
from data.model import BlobDoesNotExist
|
||||
from data.model.storage import get_storage_by_subquery, InvalidImageException
|
||||
from data.model.blob import get_repository_blob_by_digest as legacy_get
|
||||
|
||||
def get_repository_blob_by_digest(repository, blob_digest):
|
||||
""" Find the content-addressable blob linked to the specified repository and
|
||||
returns it or None if none.
|
||||
"""
|
||||
try:
|
||||
storage_id_query = (ImageStorage
|
||||
.select(ImageStorage.id)
|
||||
.join(ManifestBlob)
|
||||
.where(ManifestBlob.repository == repository,
|
||||
ImageStorage.content_checksum == blob_digest,
|
||||
ImageStorage.uploading == False)
|
||||
.limit(1))
|
||||
|
||||
return get_storage_by_subquery(storage_id_query)
|
||||
except InvalidImageException:
|
||||
# TODO(jschorr): Remove once we are no longer using the legacy tables.
|
||||
# Try the legacy call.
|
||||
try:
|
||||
return legacy_get(repository, blob_digest)
|
||||
except BlobDoesNotExist:
|
||||
return None
|
|
@ -270,7 +270,7 @@ def get_layer_path_for_storage(storage_uuid, cas_path, content_checksum):
|
|||
return store.blob_path(content_checksum)
|
||||
|
||||
|
||||
def lookup_repo_storages_by_content_checksum(repo, checksums):
|
||||
def lookup_repo_storages_by_content_checksum(repo, checksums, by_manifest=False):
|
||||
""" Looks up repository storages (without placements) matching the given repository
|
||||
and checksum. """
|
||||
# There may be many duplicates of the checksums, so for performance reasons we are going
|
||||
|
@ -279,14 +279,29 @@ def lookup_repo_storages_by_content_checksum(repo, checksums):
|
|||
|
||||
for counter, checksum in enumerate(set(checksums)):
|
||||
query_alias = 'q{0}'.format(counter)
|
||||
candidate_subq = (ImageStorage
|
||||
.select(ImageStorage.id, ImageStorage.content_checksum,
|
||||
ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path,
|
||||
ImageStorage.uncompressed_size, ImageStorage.uploading)
|
||||
.join(Image)
|
||||
.where(Image.repository == repo, ImageStorage.content_checksum == checksum)
|
||||
.limit(1)
|
||||
.alias(query_alias))
|
||||
|
||||
# TODO(jschorr): Remove once we have a new-style model for tracking temp uploaded blobs and
|
||||
# all legacy tables have been removed.
|
||||
if by_manifest:
|
||||
candidate_subq = (ImageStorage
|
||||
.select(ImageStorage.id, ImageStorage.content_checksum,
|
||||
ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path,
|
||||
ImageStorage.uncompressed_size, ImageStorage.uploading)
|
||||
.join(ManifestBlob)
|
||||
.where(ManifestBlob.repository == repo,
|
||||
ImageStorage.content_checksum == checksum)
|
||||
.limit(1)
|
||||
.alias(query_alias))
|
||||
else:
|
||||
candidate_subq = (ImageStorage
|
||||
.select(ImageStorage.id, ImageStorage.content_checksum,
|
||||
ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path,
|
||||
ImageStorage.uncompressed_size, ImageStorage.uploading)
|
||||
.join(Image)
|
||||
.where(Image.repository == repo, ImageStorage.content_checksum == checksum)
|
||||
.limit(1)
|
||||
.alias(query_alias))
|
||||
|
||||
queries.append(ImageStorage
|
||||
.select(SQL('*'))
|
||||
.from_(candidate_subq))
|
||||
|
|
|
@ -9,7 +9,8 @@ from data.model import oci, DataModelException
|
|||
from data.model.oci.retriever import RepositoryContentRetriever
|
||||
from data.database import db_transaction, Image
|
||||
from data.registry_model.interface import RegistryDataInterface
|
||||
from data.registry_model.datatypes import Tag, Manifest, LegacyImage, Label, SecurityScanStatus
|
||||
from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus,
|
||||
Blob)
|
||||
from data.registry_model.shared import SharedModel
|
||||
from data.registry_model.label_handlers import apply_label_to_manifest
|
||||
from image.docker import ManifestException
|
||||
|
@ -430,7 +431,8 @@ class OCIModel(SharedModel, RegistryDataInterface):
|
|||
logger.exception('Could not parse and validate manifest `%s`', manifest._db_id)
|
||||
return None
|
||||
|
||||
return self._list_manifest_layers(manifest_obj.repository_id, parsed, include_placements)
|
||||
return self._list_manifest_layers(manifest_obj.repository_id, parsed, include_placements,
|
||||
by_manifest=True)
|
||||
|
||||
def lookup_derived_image(self, manifest, verb, varying_metadata=None, include_placements=False):
|
||||
"""
|
||||
|
@ -501,5 +503,33 @@ class OCIModel(SharedModel, RegistryDataInterface):
|
|||
li = LegacyImage.for_image(legacy_image)
|
||||
return Manifest.for_manifest(created_manifest.manifest, li)
|
||||
|
||||
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
|
||||
"""
|
||||
Returns the blob in the repository with the given digest, if any or None if none. Note that
|
||||
there may be multiple records in the same repository for the same blob digest, so the return
|
||||
value of this function may change.
|
||||
"""
|
||||
image_storage = oci.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
|
||||
if image_storage is None:
|
||||
return None
|
||||
|
||||
assert image_storage.cas_path is not None
|
||||
|
||||
placements = None
|
||||
if include_placements:
|
||||
placements = list(model.storage.get_storage_locations(image_storage.uuid))
|
||||
|
||||
return Blob.for_image_storage(image_storage,
|
||||
storage_path=model.storage.get_layer_path(image_storage),
|
||||
placements=placements)
|
||||
|
||||
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False):
|
||||
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
|
||||
and working towards the leaf, including the associated Blob and its placements
|
||||
(if specified).
|
||||
"""
|
||||
return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements,
|
||||
by_manifest=True)
|
||||
|
||||
|
||||
oci_model = OCIModel()
|
||||
|
|
|
@ -556,5 +556,33 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
|
|||
"""
|
||||
raise NotImplementedError('Unsupported in pre OCI model')
|
||||
|
||||
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
|
||||
"""
|
||||
Returns the blob in the repository with the given digest, if any or None if none. Note that
|
||||
there may be multiple records in the same repository for the same blob digest, so the return
|
||||
value of this function may change.
|
||||
"""
|
||||
try:
|
||||
image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
|
||||
except model.BlobDoesNotExist:
|
||||
return None
|
||||
|
||||
assert image_storage.cas_path is not None
|
||||
|
||||
placements = None
|
||||
if include_placements:
|
||||
placements = list(model.storage.get_storage_locations(image_storage.uuid))
|
||||
|
||||
return Blob.for_image_storage(image_storage,
|
||||
storage_path=model.storage.get_layer_path(image_storage),
|
||||
placements=placements)
|
||||
|
||||
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False):
|
||||
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
|
||||
and working towards the leaf, including the associated Blob and its placements
|
||||
(if specified).
|
||||
"""
|
||||
return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements)
|
||||
|
||||
|
||||
pre_oci_model = PreOCIModel()
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# pylint: disable=protected-access
|
||||
import logging
|
||||
|
||||
from abc import abstractmethod
|
||||
from collections import defaultdict
|
||||
|
||||
from data import database
|
||||
|
@ -149,26 +150,9 @@ class SharedModel:
|
|||
|
||||
return self.get_repo_blob_by_digest(repository_ref, blob_digest, include_placements=True)
|
||||
|
||||
@abstractmethod
|
||||
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
|
||||
"""
|
||||
Returns the blob in the repository with the given digest, if any or None if none. Note that
|
||||
there may be multiple records in the same repository for the same blob digest, so the return
|
||||
value of this function may change.
|
||||
"""
|
||||
try:
|
||||
image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
|
||||
except model.BlobDoesNotExist:
|
||||
return None
|
||||
|
||||
assert image_storage.cas_path is not None
|
||||
|
||||
placements = None
|
||||
if include_placements:
|
||||
placements = list(model.storage.get_storage_locations(image_storage.uuid))
|
||||
|
||||
return Blob.for_image_storage(image_storage,
|
||||
storage_path=model.storage.get_layer_path(image_storage),
|
||||
placements=placements)
|
||||
pass
|
||||
|
||||
def create_blob_upload(self, repository_ref, new_upload_id, location_name, storage_metadata):
|
||||
""" Creates a new blob upload and returns a reference. If the blob upload could not be
|
||||
|
@ -306,14 +290,7 @@ class SharedModel:
|
|||
|
||||
return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob)
|
||||
|
||||
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False):
|
||||
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
|
||||
and working towards the leaf, including the associated Blob and its placements
|
||||
(if specified).
|
||||
"""
|
||||
return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements)
|
||||
|
||||
def _list_manifest_layers(self, repo_id, parsed, include_placements=False):
|
||||
def _list_manifest_layers(self, repo_id, parsed, include_placements=False, by_manifest=False):
|
||||
""" Returns an *ordered list* of the layers found in the manifest, starting at the base and
|
||||
working towards the leaf, including the associated Blob and its placements (if specified).
|
||||
Returns None if the manifest could not be parsed and validated.
|
||||
|
@ -321,7 +298,8 @@ class SharedModel:
|
|||
storage_map = {}
|
||||
if parsed.local_blob_digests:
|
||||
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id,
|
||||
parsed.local_blob_digests)
|
||||
parsed.local_blob_digests,
|
||||
by_manifest=by_manifest)
|
||||
storage_map = {blob.content_checksum: blob for blob in blob_query}
|
||||
|
||||
manifest_layers = []
|
||||
|
|
|
@ -672,25 +672,26 @@ def test_commit_blob_upload(registry_model):
|
|||
assert not registry_model.lookup_blob_upload(repository_ref, blob_upload.upload_id)
|
||||
|
||||
|
||||
def test_mount_blob_into_repository(registry_model):
|
||||
repository_ref = registry_model.lookup_repository('devtable', 'simple')
|
||||
latest_tag = registry_model.get_repo_tag(repository_ref, 'latest')
|
||||
manifest = registry_model.get_manifest_for_tag(latest_tag)
|
||||
# TODO(jschorr): Re-enable for OCI model once we have a new table for temporary blobs.
|
||||
def test_mount_blob_into_repository(pre_oci_model):
|
||||
repository_ref = pre_oci_model.lookup_repository('devtable', 'simple')
|
||||
latest_tag = pre_oci_model.get_repo_tag(repository_ref, 'latest')
|
||||
manifest = pre_oci_model.get_manifest_for_tag(latest_tag)
|
||||
|
||||
target_repository_ref = registry_model.lookup_repository('devtable', 'complex')
|
||||
target_repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
|
||||
|
||||
layers = registry_model.list_manifest_layers(manifest, include_placements=True)
|
||||
layers = pre_oci_model.list_manifest_layers(manifest, include_placements=True)
|
||||
assert layers
|
||||
|
||||
for layer in layers:
|
||||
# Ensure the blob doesn't exist under the repository.
|
||||
assert not registry_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
|
||||
assert not pre_oci_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
|
||||
|
||||
# Mount the blob into the repository.
|
||||
assert registry_model.mount_blob_into_repository(layer.blob, target_repository_ref, 60)
|
||||
assert pre_oci_model.mount_blob_into_repository(layer.blob, target_repository_ref, 60)
|
||||
|
||||
# Ensure it now exists.
|
||||
found = registry_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
|
||||
found = pre_oci_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
|
||||
assert found == layer.blob
|
||||
|
||||
|
||||
|
@ -725,19 +726,21 @@ def test_get_cached_repo_blob(registry_model):
|
|||
|
||||
with patch('data.registry_model.registry_pre_oci_model.model.blob.get_repository_blob_by_digest',
|
||||
fail):
|
||||
# Make sure we can load again, which should hit the cache.
|
||||
cached = registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', blob.digest)
|
||||
assert cached.digest == blob.digest
|
||||
assert cached.uuid == blob.uuid
|
||||
assert cached.compressed_size == blob.compressed_size
|
||||
assert cached.uncompressed_size == blob.uncompressed_size
|
||||
assert cached.uploading == blob.uploading
|
||||
assert cached.placements == blob.placements
|
||||
with patch('data.registry_model.registry_oci_model.model.oci.blob.get_repository_blob_by_digest',
|
||||
fail):
|
||||
# Make sure we can load again, which should hit the cache.
|
||||
cached = registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', blob.digest)
|
||||
assert cached.digest == blob.digest
|
||||
assert cached.uuid == blob.uuid
|
||||
assert cached.compressed_size == blob.compressed_size
|
||||
assert cached.uncompressed_size == blob.uncompressed_size
|
||||
assert cached.uploading == blob.uploading
|
||||
assert cached.placements == blob.placements
|
||||
|
||||
# Try another blob, which should fail since the DB is not connected and the cache
|
||||
# does not contain the blob.
|
||||
with pytest.raises(SomeException):
|
||||
registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', 'some other digest')
|
||||
# Try another blob, which should fail since the DB is not connected and the cache
|
||||
# does not contain the blob.
|
||||
with pytest.raises(SomeException):
|
||||
registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', 'some other digest')
|
||||
|
||||
|
||||
def test_create_manifest_and_retarget_tag(registry_model):
|
||||
|
|
|
@ -6,7 +6,7 @@ from image.docker.schema1 import (DockerSchema1ManifestBuilder,
|
|||
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE,
|
||||
DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE)
|
||||
from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest,
|
||||
DockerSchema2ManifestBuilder, EMPTY_BLOB_DIGEST)
|
||||
DockerSchema2ManifestBuilder, EMPTY_LAYER_BLOB_DIGEST)
|
||||
from image.docker.schema2.test.test_config import CONFIG_BYTES
|
||||
from image.docker.schemautil import ContentRetrieverForTesting
|
||||
|
||||
|
@ -279,7 +279,7 @@ def test_generate_legacy_layers():
|
|||
legacy_layers = list(manifest.generate_legacy_layers({}, retriever))
|
||||
assert len(legacy_layers) == 3
|
||||
assert legacy_layers[0].content_checksum == 'sha256:abc123'
|
||||
assert legacy_layers[1].content_checksum == EMPTY_BLOB_DIGEST
|
||||
assert legacy_layers[1].content_checksum == EMPTY_LAYER_BLOB_DIGEST
|
||||
assert legacy_layers[2].content_checksum == 'sha256:def456'
|
||||
|
||||
assert legacy_layers[0].created == "2018-04-03T18:37:09.284840891Z"
|
||||
|
|
|
@ -96,6 +96,9 @@ class BrokenManifest(ManifestInterface):
|
|||
def has_legacy_image(self):
|
||||
return False
|
||||
|
||||
def get_requires_empty_layer_blob(self, content_retriever):
|
||||
return False
|
||||
|
||||
|
||||
class ManifestBackfillWorker(Worker):
|
||||
def __init__(self):
|
||||
|
|
Reference in a new issue