Read blobs from new manifest blob table where relevant

This commit is contained in:
Joseph Schorr 2018-11-25 17:31:09 +02:00
parent 4985040d31
commit adccdd30ca
9 changed files with 146 additions and 62 deletions

View file

@ -1,6 +1,7 @@
# There MUST NOT be any circular dependencies between these subsections. If there are fix it by
# moving the minimal number of things to shared
from data.model.oci import (
blob,
label,
manifest,
shared,

26
data/model/oci/blob.py Normal file
View file

@ -0,0 +1,26 @@
from data.database import ImageStorage, ManifestBlob
from data.model import BlobDoesNotExist
from data.model.storage import get_storage_by_subquery, InvalidImageException
from data.model.blob import get_repository_blob_by_digest as legacy_get
def get_repository_blob_by_digest(repository, blob_digest):
""" Find the content-addressable blob linked to the specified repository and
returns it or None if none.
"""
try:
storage_id_query = (ImageStorage
.select(ImageStorage.id)
.join(ManifestBlob)
.where(ManifestBlob.repository == repository,
ImageStorage.content_checksum == blob_digest,
ImageStorage.uploading == False)
.limit(1))
return get_storage_by_subquery(storage_id_query)
except InvalidImageException:
# TODO(jschorr): Remove once we are no longer using the legacy tables.
# Try the legacy call.
try:
return legacy_get(repository, blob_digest)
except BlobDoesNotExist:
return None

View file

@ -270,7 +270,7 @@ def get_layer_path_for_storage(storage_uuid, cas_path, content_checksum):
return store.blob_path(content_checksum)
def lookup_repo_storages_by_content_checksum(repo, checksums):
def lookup_repo_storages_by_content_checksum(repo, checksums, by_manifest=False):
""" Looks up repository storages (without placements) matching the given repository
and checksum. """
# There may be many duplicates of the checksums, so for performance reasons we are going
@ -279,14 +279,29 @@ def lookup_repo_storages_by_content_checksum(repo, checksums):
for counter, checksum in enumerate(set(checksums)):
query_alias = 'q{0}'.format(counter)
candidate_subq = (ImageStorage
.select(ImageStorage.id, ImageStorage.content_checksum,
ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path,
ImageStorage.uncompressed_size, ImageStorage.uploading)
.join(Image)
.where(Image.repository == repo, ImageStorage.content_checksum == checksum)
.limit(1)
.alias(query_alias))
# TODO(jschorr): Remove once we have a new-style model for tracking temp uploaded blobs and
# all legacy tables have been removed.
if by_manifest:
candidate_subq = (ImageStorage
.select(ImageStorage.id, ImageStorage.content_checksum,
ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path,
ImageStorage.uncompressed_size, ImageStorage.uploading)
.join(ManifestBlob)
.where(ManifestBlob.repository == repo,
ImageStorage.content_checksum == checksum)
.limit(1)
.alias(query_alias))
else:
candidate_subq = (ImageStorage
.select(ImageStorage.id, ImageStorage.content_checksum,
ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path,
ImageStorage.uncompressed_size, ImageStorage.uploading)
.join(Image)
.where(Image.repository == repo, ImageStorage.content_checksum == checksum)
.limit(1)
.alias(query_alias))
queries.append(ImageStorage
.select(SQL('*'))
.from_(candidate_subq))

View file

@ -9,7 +9,8 @@ from data.model import oci, DataModelException
from data.model.oci.retriever import RepositoryContentRetriever
from data.database import db_transaction, Image
from data.registry_model.interface import RegistryDataInterface
from data.registry_model.datatypes import Tag, Manifest, LegacyImage, Label, SecurityScanStatus
from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus,
Blob)
from data.registry_model.shared import SharedModel
from data.registry_model.label_handlers import apply_label_to_manifest
from image.docker import ManifestException
@ -430,7 +431,8 @@ class OCIModel(SharedModel, RegistryDataInterface):
logger.exception('Could not parse and validate manifest `%s`', manifest._db_id)
return None
return self._list_manifest_layers(manifest_obj.repository_id, parsed, include_placements)
return self._list_manifest_layers(manifest_obj.repository_id, parsed, include_placements,
by_manifest=True)
def lookup_derived_image(self, manifest, verb, varying_metadata=None, include_placements=False):
"""
@ -501,5 +503,33 @@ class OCIModel(SharedModel, RegistryDataInterface):
li = LegacyImage.for_image(legacy_image)
return Manifest.for_manifest(created_manifest.manifest, li)
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
"""
Returns the blob in the repository with the given digest, if any or None if none. Note that
there may be multiple records in the same repository for the same blob digest, so the return
value of this function may change.
"""
image_storage = oci.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
if image_storage is None:
return None
assert image_storage.cas_path is not None
placements = None
if include_placements:
placements = list(model.storage.get_storage_locations(image_storage.uuid))
return Blob.for_image_storage(image_storage,
storage_path=model.storage.get_layer_path(image_storage),
placements=placements)
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False):
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
and working towards the leaf, including the associated Blob and its placements
(if specified).
"""
return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements,
by_manifest=True)
oci_model = OCIModel()

View file

@ -556,5 +556,33 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
"""
raise NotImplementedError('Unsupported in pre OCI model')
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
"""
Returns the blob in the repository with the given digest, if any or None if none. Note that
there may be multiple records in the same repository for the same blob digest, so the return
value of this function may change.
"""
try:
image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
except model.BlobDoesNotExist:
return None
assert image_storage.cas_path is not None
placements = None
if include_placements:
placements = list(model.storage.get_storage_locations(image_storage.uuid))
return Blob.for_image_storage(image_storage,
storage_path=model.storage.get_layer_path(image_storage),
placements=placements)
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False):
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
and working towards the leaf, including the associated Blob and its placements
(if specified).
"""
return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements)
pre_oci_model = PreOCIModel()

View file

@ -1,6 +1,7 @@
# pylint: disable=protected-access
import logging
from abc import abstractmethod
from collections import defaultdict
from data import database
@ -149,26 +150,9 @@ class SharedModel:
return self.get_repo_blob_by_digest(repository_ref, blob_digest, include_placements=True)
@abstractmethod
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
"""
Returns the blob in the repository with the given digest, if any or None if none. Note that
there may be multiple records in the same repository for the same blob digest, so the return
value of this function may change.
"""
try:
image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
except model.BlobDoesNotExist:
return None
assert image_storage.cas_path is not None
placements = None
if include_placements:
placements = list(model.storage.get_storage_locations(image_storage.uuid))
return Blob.for_image_storage(image_storage,
storage_path=model.storage.get_layer_path(image_storage),
placements=placements)
pass
def create_blob_upload(self, repository_ref, new_upload_id, location_name, storage_metadata):
""" Creates a new blob upload and returns a reference. If the blob upload could not be
@ -306,14 +290,7 @@ class SharedModel:
return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob)
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False):
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
and working towards the leaf, including the associated Blob and its placements
(if specified).
"""
return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements)
def _list_manifest_layers(self, repo_id, parsed, include_placements=False):
def _list_manifest_layers(self, repo_id, parsed, include_placements=False, by_manifest=False):
""" Returns an *ordered list* of the layers found in the manifest, starting at the base and
working towards the leaf, including the associated Blob and its placements (if specified).
Returns None if the manifest could not be parsed and validated.
@ -321,7 +298,8 @@ class SharedModel:
storage_map = {}
if parsed.local_blob_digests:
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id,
parsed.local_blob_digests)
parsed.local_blob_digests,
by_manifest=by_manifest)
storage_map = {blob.content_checksum: blob for blob in blob_query}
manifest_layers = []

View file

@ -672,25 +672,26 @@ def test_commit_blob_upload(registry_model):
assert not registry_model.lookup_blob_upload(repository_ref, blob_upload.upload_id)
def test_mount_blob_into_repository(registry_model):
repository_ref = registry_model.lookup_repository('devtable', 'simple')
latest_tag = registry_model.get_repo_tag(repository_ref, 'latest')
manifest = registry_model.get_manifest_for_tag(latest_tag)
# TODO(jschorr): Re-enable for OCI model once we have a new table for temporary blobs.
def test_mount_blob_into_repository(pre_oci_model):
repository_ref = pre_oci_model.lookup_repository('devtable', 'simple')
latest_tag = pre_oci_model.get_repo_tag(repository_ref, 'latest')
manifest = pre_oci_model.get_manifest_for_tag(latest_tag)
target_repository_ref = registry_model.lookup_repository('devtable', 'complex')
target_repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
layers = registry_model.list_manifest_layers(manifest, include_placements=True)
layers = pre_oci_model.list_manifest_layers(manifest, include_placements=True)
assert layers
for layer in layers:
# Ensure the blob doesn't exist under the repository.
assert not registry_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
assert not pre_oci_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
# Mount the blob into the repository.
assert registry_model.mount_blob_into_repository(layer.blob, target_repository_ref, 60)
assert pre_oci_model.mount_blob_into_repository(layer.blob, target_repository_ref, 60)
# Ensure it now exists.
found = registry_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
found = pre_oci_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
assert found == layer.blob
@ -725,19 +726,21 @@ def test_get_cached_repo_blob(registry_model):
with patch('data.registry_model.registry_pre_oci_model.model.blob.get_repository_blob_by_digest',
fail):
# Make sure we can load again, which should hit the cache.
cached = registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', blob.digest)
assert cached.digest == blob.digest
assert cached.uuid == blob.uuid
assert cached.compressed_size == blob.compressed_size
assert cached.uncompressed_size == blob.uncompressed_size
assert cached.uploading == blob.uploading
assert cached.placements == blob.placements
with patch('data.registry_model.registry_oci_model.model.oci.blob.get_repository_blob_by_digest',
fail):
# Make sure we can load again, which should hit the cache.
cached = registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', blob.digest)
assert cached.digest == blob.digest
assert cached.uuid == blob.uuid
assert cached.compressed_size == blob.compressed_size
assert cached.uncompressed_size == blob.uncompressed_size
assert cached.uploading == blob.uploading
assert cached.placements == blob.placements
# Try another blob, which should fail since the DB is not connected and the cache
# does not contain the blob.
with pytest.raises(SomeException):
registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', 'some other digest')
# Try another blob, which should fail since the DB is not connected and the cache
# does not contain the blob.
with pytest.raises(SomeException):
registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', 'some other digest')
def test_create_manifest_and_retarget_tag(registry_model):

View file

@ -6,7 +6,7 @@ from image.docker.schema1 import (DockerSchema1ManifestBuilder,
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE,
DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE)
from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest,
DockerSchema2ManifestBuilder, EMPTY_BLOB_DIGEST)
DockerSchema2ManifestBuilder, EMPTY_LAYER_BLOB_DIGEST)
from image.docker.schema2.test.test_config import CONFIG_BYTES
from image.docker.schemautil import ContentRetrieverForTesting
@ -279,7 +279,7 @@ def test_generate_legacy_layers():
legacy_layers = list(manifest.generate_legacy_layers({}, retriever))
assert len(legacy_layers) == 3
assert legacy_layers[0].content_checksum == 'sha256:abc123'
assert legacy_layers[1].content_checksum == EMPTY_BLOB_DIGEST
assert legacy_layers[1].content_checksum == EMPTY_LAYER_BLOB_DIGEST
assert legacy_layers[2].content_checksum == 'sha256:def456'
assert legacy_layers[0].created == "2018-04-03T18:37:09.284840891Z"

View file

@ -96,6 +96,9 @@ class BrokenManifest(ManifestInterface):
def has_legacy_image(self):
return False
def get_requires_empty_layer_blob(self, content_retriever):
return False
class ManifestBackfillWorker(Worker):
def __init__(self):