Read blobs from new manifest blob table where relevant
This commit is contained in:
parent
4985040d31
commit
adccdd30ca
9 changed files with 146 additions and 62 deletions
|
@ -1,6 +1,7 @@
|
||||||
# There MUST NOT be any circular dependencies between these subsections. If there are fix it by
|
# There MUST NOT be any circular dependencies between these subsections. If there are fix it by
|
||||||
# moving the minimal number of things to shared
|
# moving the minimal number of things to shared
|
||||||
from data.model.oci import (
|
from data.model.oci import (
|
||||||
|
blob,
|
||||||
label,
|
label,
|
||||||
manifest,
|
manifest,
|
||||||
shared,
|
shared,
|
||||||
|
|
26
data/model/oci/blob.py
Normal file
26
data/model/oci/blob.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
from data.database import ImageStorage, ManifestBlob
|
||||||
|
from data.model import BlobDoesNotExist
|
||||||
|
from data.model.storage import get_storage_by_subquery, InvalidImageException
|
||||||
|
from data.model.blob import get_repository_blob_by_digest as legacy_get
|
||||||
|
|
||||||
|
def get_repository_blob_by_digest(repository, blob_digest):
|
||||||
|
""" Find the content-addressable blob linked to the specified repository and
|
||||||
|
returns it or None if none.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
storage_id_query = (ImageStorage
|
||||||
|
.select(ImageStorage.id)
|
||||||
|
.join(ManifestBlob)
|
||||||
|
.where(ManifestBlob.repository == repository,
|
||||||
|
ImageStorage.content_checksum == blob_digest,
|
||||||
|
ImageStorage.uploading == False)
|
||||||
|
.limit(1))
|
||||||
|
|
||||||
|
return get_storage_by_subquery(storage_id_query)
|
||||||
|
except InvalidImageException:
|
||||||
|
# TODO(jschorr): Remove once we are no longer using the legacy tables.
|
||||||
|
# Try the legacy call.
|
||||||
|
try:
|
||||||
|
return legacy_get(repository, blob_digest)
|
||||||
|
except BlobDoesNotExist:
|
||||||
|
return None
|
|
@ -270,7 +270,7 @@ def get_layer_path_for_storage(storage_uuid, cas_path, content_checksum):
|
||||||
return store.blob_path(content_checksum)
|
return store.blob_path(content_checksum)
|
||||||
|
|
||||||
|
|
||||||
def lookup_repo_storages_by_content_checksum(repo, checksums):
|
def lookup_repo_storages_by_content_checksum(repo, checksums, by_manifest=False):
|
||||||
""" Looks up repository storages (without placements) matching the given repository
|
""" Looks up repository storages (without placements) matching the given repository
|
||||||
and checksum. """
|
and checksum. """
|
||||||
# There may be many duplicates of the checksums, so for performance reasons we are going
|
# There may be many duplicates of the checksums, so for performance reasons we are going
|
||||||
|
@ -279,14 +279,29 @@ def lookup_repo_storages_by_content_checksum(repo, checksums):
|
||||||
|
|
||||||
for counter, checksum in enumerate(set(checksums)):
|
for counter, checksum in enumerate(set(checksums)):
|
||||||
query_alias = 'q{0}'.format(counter)
|
query_alias = 'q{0}'.format(counter)
|
||||||
candidate_subq = (ImageStorage
|
|
||||||
.select(ImageStorage.id, ImageStorage.content_checksum,
|
# TODO(jschorr): Remove once we have a new-style model for tracking temp uploaded blobs and
|
||||||
ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path,
|
# all legacy tables have been removed.
|
||||||
ImageStorage.uncompressed_size, ImageStorage.uploading)
|
if by_manifest:
|
||||||
.join(Image)
|
candidate_subq = (ImageStorage
|
||||||
.where(Image.repository == repo, ImageStorage.content_checksum == checksum)
|
.select(ImageStorage.id, ImageStorage.content_checksum,
|
||||||
.limit(1)
|
ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path,
|
||||||
.alias(query_alias))
|
ImageStorage.uncompressed_size, ImageStorage.uploading)
|
||||||
|
.join(ManifestBlob)
|
||||||
|
.where(ManifestBlob.repository == repo,
|
||||||
|
ImageStorage.content_checksum == checksum)
|
||||||
|
.limit(1)
|
||||||
|
.alias(query_alias))
|
||||||
|
else:
|
||||||
|
candidate_subq = (ImageStorage
|
||||||
|
.select(ImageStorage.id, ImageStorage.content_checksum,
|
||||||
|
ImageStorage.image_size, ImageStorage.uuid, ImageStorage.cas_path,
|
||||||
|
ImageStorage.uncompressed_size, ImageStorage.uploading)
|
||||||
|
.join(Image)
|
||||||
|
.where(Image.repository == repo, ImageStorage.content_checksum == checksum)
|
||||||
|
.limit(1)
|
||||||
|
.alias(query_alias))
|
||||||
|
|
||||||
queries.append(ImageStorage
|
queries.append(ImageStorage
|
||||||
.select(SQL('*'))
|
.select(SQL('*'))
|
||||||
.from_(candidate_subq))
|
.from_(candidate_subq))
|
||||||
|
|
|
@ -9,7 +9,8 @@ from data.model import oci, DataModelException
|
||||||
from data.model.oci.retriever import RepositoryContentRetriever
|
from data.model.oci.retriever import RepositoryContentRetriever
|
||||||
from data.database import db_transaction, Image
|
from data.database import db_transaction, Image
|
||||||
from data.registry_model.interface import RegistryDataInterface
|
from data.registry_model.interface import RegistryDataInterface
|
||||||
from data.registry_model.datatypes import Tag, Manifest, LegacyImage, Label, SecurityScanStatus
|
from data.registry_model.datatypes import (Tag, Manifest, LegacyImage, Label, SecurityScanStatus,
|
||||||
|
Blob)
|
||||||
from data.registry_model.shared import SharedModel
|
from data.registry_model.shared import SharedModel
|
||||||
from data.registry_model.label_handlers import apply_label_to_manifest
|
from data.registry_model.label_handlers import apply_label_to_manifest
|
||||||
from image.docker import ManifestException
|
from image.docker import ManifestException
|
||||||
|
@ -430,7 +431,8 @@ class OCIModel(SharedModel, RegistryDataInterface):
|
||||||
logger.exception('Could not parse and validate manifest `%s`', manifest._db_id)
|
logger.exception('Could not parse and validate manifest `%s`', manifest._db_id)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return self._list_manifest_layers(manifest_obj.repository_id, parsed, include_placements)
|
return self._list_manifest_layers(manifest_obj.repository_id, parsed, include_placements,
|
||||||
|
by_manifest=True)
|
||||||
|
|
||||||
def lookup_derived_image(self, manifest, verb, varying_metadata=None, include_placements=False):
|
def lookup_derived_image(self, manifest, verb, varying_metadata=None, include_placements=False):
|
||||||
"""
|
"""
|
||||||
|
@ -501,5 +503,33 @@ class OCIModel(SharedModel, RegistryDataInterface):
|
||||||
li = LegacyImage.for_image(legacy_image)
|
li = LegacyImage.for_image(legacy_image)
|
||||||
return Manifest.for_manifest(created_manifest.manifest, li)
|
return Manifest.for_manifest(created_manifest.manifest, li)
|
||||||
|
|
||||||
|
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
|
||||||
|
"""
|
||||||
|
Returns the blob in the repository with the given digest, if any or None if none. Note that
|
||||||
|
there may be multiple records in the same repository for the same blob digest, so the return
|
||||||
|
value of this function may change.
|
||||||
|
"""
|
||||||
|
image_storage = oci.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
|
||||||
|
if image_storage is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
assert image_storage.cas_path is not None
|
||||||
|
|
||||||
|
placements = None
|
||||||
|
if include_placements:
|
||||||
|
placements = list(model.storage.get_storage_locations(image_storage.uuid))
|
||||||
|
|
||||||
|
return Blob.for_image_storage(image_storage,
|
||||||
|
storage_path=model.storage.get_layer_path(image_storage),
|
||||||
|
placements=placements)
|
||||||
|
|
||||||
|
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False):
|
||||||
|
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
|
||||||
|
and working towards the leaf, including the associated Blob and its placements
|
||||||
|
(if specified).
|
||||||
|
"""
|
||||||
|
return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements,
|
||||||
|
by_manifest=True)
|
||||||
|
|
||||||
|
|
||||||
oci_model = OCIModel()
|
oci_model = OCIModel()
|
||||||
|
|
|
@ -556,5 +556,33 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError('Unsupported in pre OCI model')
|
raise NotImplementedError('Unsupported in pre OCI model')
|
||||||
|
|
||||||
|
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
|
||||||
|
"""
|
||||||
|
Returns the blob in the repository with the given digest, if any or None if none. Note that
|
||||||
|
there may be multiple records in the same repository for the same blob digest, so the return
|
||||||
|
value of this function may change.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
|
||||||
|
except model.BlobDoesNotExist:
|
||||||
|
return None
|
||||||
|
|
||||||
|
assert image_storage.cas_path is not None
|
||||||
|
|
||||||
|
placements = None
|
||||||
|
if include_placements:
|
||||||
|
placements = list(model.storage.get_storage_locations(image_storage.uuid))
|
||||||
|
|
||||||
|
return Blob.for_image_storage(image_storage,
|
||||||
|
storage_path=model.storage.get_layer_path(image_storage),
|
||||||
|
placements=placements)
|
||||||
|
|
||||||
|
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False):
|
||||||
|
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
|
||||||
|
and working towards the leaf, including the associated Blob and its placements
|
||||||
|
(if specified).
|
||||||
|
"""
|
||||||
|
return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements)
|
||||||
|
|
||||||
|
|
||||||
pre_oci_model = PreOCIModel()
|
pre_oci_model = PreOCIModel()
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# pylint: disable=protected-access
|
# pylint: disable=protected-access
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from abc import abstractmethod
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
from data import database
|
from data import database
|
||||||
|
@ -149,26 +150,9 @@ class SharedModel:
|
||||||
|
|
||||||
return self.get_repo_blob_by_digest(repository_ref, blob_digest, include_placements=True)
|
return self.get_repo_blob_by_digest(repository_ref, blob_digest, include_placements=True)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
|
def get_repo_blob_by_digest(self, repository_ref, blob_digest, include_placements=False):
|
||||||
"""
|
pass
|
||||||
Returns the blob in the repository with the given digest, if any or None if none. Note that
|
|
||||||
there may be multiple records in the same repository for the same blob digest, so the return
|
|
||||||
value of this function may change.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
|
|
||||||
except model.BlobDoesNotExist:
|
|
||||||
return None
|
|
||||||
|
|
||||||
assert image_storage.cas_path is not None
|
|
||||||
|
|
||||||
placements = None
|
|
||||||
if include_placements:
|
|
||||||
placements = list(model.storage.get_storage_locations(image_storage.uuid))
|
|
||||||
|
|
||||||
return Blob.for_image_storage(image_storage,
|
|
||||||
storage_path=model.storage.get_layer_path(image_storage),
|
|
||||||
placements=placements)
|
|
||||||
|
|
||||||
def create_blob_upload(self, repository_ref, new_upload_id, location_name, storage_metadata):
|
def create_blob_upload(self, repository_ref, new_upload_id, location_name, storage_metadata):
|
||||||
""" Creates a new blob upload and returns a reference. If the blob upload could not be
|
""" Creates a new blob upload and returns a reference. If the blob upload could not be
|
||||||
|
@ -306,14 +290,7 @@ class SharedModel:
|
||||||
|
|
||||||
return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob)
|
return LegacyImage.for_image(image, images_map=parent_images_map, blob=blob)
|
||||||
|
|
||||||
def list_parsed_manifest_layers(self, repository_ref, parsed_manifest, include_placements=False):
|
def _list_manifest_layers(self, repo_id, parsed, include_placements=False, by_manifest=False):
|
||||||
""" Returns an *ordered list* of the layers found in the parsed manifest, starting at the base
|
|
||||||
and working towards the leaf, including the associated Blob and its placements
|
|
||||||
(if specified).
|
|
||||||
"""
|
|
||||||
return self._list_manifest_layers(repository_ref._db_id, parsed_manifest, include_placements)
|
|
||||||
|
|
||||||
def _list_manifest_layers(self, repo_id, parsed, include_placements=False):
|
|
||||||
""" Returns an *ordered list* of the layers found in the manifest, starting at the base and
|
""" Returns an *ordered list* of the layers found in the manifest, starting at the base and
|
||||||
working towards the leaf, including the associated Blob and its placements (if specified).
|
working towards the leaf, including the associated Blob and its placements (if specified).
|
||||||
Returns None if the manifest could not be parsed and validated.
|
Returns None if the manifest could not be parsed and validated.
|
||||||
|
@ -321,7 +298,8 @@ class SharedModel:
|
||||||
storage_map = {}
|
storage_map = {}
|
||||||
if parsed.local_blob_digests:
|
if parsed.local_blob_digests:
|
||||||
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id,
|
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id,
|
||||||
parsed.local_blob_digests)
|
parsed.local_blob_digests,
|
||||||
|
by_manifest=by_manifest)
|
||||||
storage_map = {blob.content_checksum: blob for blob in blob_query}
|
storage_map = {blob.content_checksum: blob for blob in blob_query}
|
||||||
|
|
||||||
manifest_layers = []
|
manifest_layers = []
|
||||||
|
|
|
@ -672,25 +672,26 @@ def test_commit_blob_upload(registry_model):
|
||||||
assert not registry_model.lookup_blob_upload(repository_ref, blob_upload.upload_id)
|
assert not registry_model.lookup_blob_upload(repository_ref, blob_upload.upload_id)
|
||||||
|
|
||||||
|
|
||||||
def test_mount_blob_into_repository(registry_model):
|
# TODO(jschorr): Re-enable for OCI model once we have a new table for temporary blobs.
|
||||||
repository_ref = registry_model.lookup_repository('devtable', 'simple')
|
def test_mount_blob_into_repository(pre_oci_model):
|
||||||
latest_tag = registry_model.get_repo_tag(repository_ref, 'latest')
|
repository_ref = pre_oci_model.lookup_repository('devtable', 'simple')
|
||||||
manifest = registry_model.get_manifest_for_tag(latest_tag)
|
latest_tag = pre_oci_model.get_repo_tag(repository_ref, 'latest')
|
||||||
|
manifest = pre_oci_model.get_manifest_for_tag(latest_tag)
|
||||||
|
|
||||||
target_repository_ref = registry_model.lookup_repository('devtable', 'complex')
|
target_repository_ref = pre_oci_model.lookup_repository('devtable', 'complex')
|
||||||
|
|
||||||
layers = registry_model.list_manifest_layers(manifest, include_placements=True)
|
layers = pre_oci_model.list_manifest_layers(manifest, include_placements=True)
|
||||||
assert layers
|
assert layers
|
||||||
|
|
||||||
for layer in layers:
|
for layer in layers:
|
||||||
# Ensure the blob doesn't exist under the repository.
|
# Ensure the blob doesn't exist under the repository.
|
||||||
assert not registry_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
|
assert not pre_oci_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
|
||||||
|
|
||||||
# Mount the blob into the repository.
|
# Mount the blob into the repository.
|
||||||
assert registry_model.mount_blob_into_repository(layer.blob, target_repository_ref, 60)
|
assert pre_oci_model.mount_blob_into_repository(layer.blob, target_repository_ref, 60)
|
||||||
|
|
||||||
# Ensure it now exists.
|
# Ensure it now exists.
|
||||||
found = registry_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
|
found = pre_oci_model.get_repo_blob_by_digest(target_repository_ref, layer.blob.digest)
|
||||||
assert found == layer.blob
|
assert found == layer.blob
|
||||||
|
|
||||||
|
|
||||||
|
@ -725,19 +726,21 @@ def test_get_cached_repo_blob(registry_model):
|
||||||
|
|
||||||
with patch('data.registry_model.registry_pre_oci_model.model.blob.get_repository_blob_by_digest',
|
with patch('data.registry_model.registry_pre_oci_model.model.blob.get_repository_blob_by_digest',
|
||||||
fail):
|
fail):
|
||||||
# Make sure we can load again, which should hit the cache.
|
with patch('data.registry_model.registry_oci_model.model.oci.blob.get_repository_blob_by_digest',
|
||||||
cached = registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', blob.digest)
|
fail):
|
||||||
assert cached.digest == blob.digest
|
# Make sure we can load again, which should hit the cache.
|
||||||
assert cached.uuid == blob.uuid
|
cached = registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', blob.digest)
|
||||||
assert cached.compressed_size == blob.compressed_size
|
assert cached.digest == blob.digest
|
||||||
assert cached.uncompressed_size == blob.uncompressed_size
|
assert cached.uuid == blob.uuid
|
||||||
assert cached.uploading == blob.uploading
|
assert cached.compressed_size == blob.compressed_size
|
||||||
assert cached.placements == blob.placements
|
assert cached.uncompressed_size == blob.uncompressed_size
|
||||||
|
assert cached.uploading == blob.uploading
|
||||||
|
assert cached.placements == blob.placements
|
||||||
|
|
||||||
# Try another blob, which should fail since the DB is not connected and the cache
|
# Try another blob, which should fail since the DB is not connected and the cache
|
||||||
# does not contain the blob.
|
# does not contain the blob.
|
||||||
with pytest.raises(SomeException):
|
with pytest.raises(SomeException):
|
||||||
registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', 'some other digest')
|
registry_model.get_cached_repo_blob(model_cache, 'devtable', 'simple', 'some other digest')
|
||||||
|
|
||||||
|
|
||||||
def test_create_manifest_and_retarget_tag(registry_model):
|
def test_create_manifest_and_retarget_tag(registry_model):
|
||||||
|
|
|
@ -6,7 +6,7 @@ from image.docker.schema1 import (DockerSchema1ManifestBuilder,
|
||||||
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE,
|
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE,
|
||||||
DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE)
|
DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE)
|
||||||
from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest,
|
from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchema2Manifest,
|
||||||
DockerSchema2ManifestBuilder, EMPTY_BLOB_DIGEST)
|
DockerSchema2ManifestBuilder, EMPTY_LAYER_BLOB_DIGEST)
|
||||||
from image.docker.schema2.test.test_config import CONFIG_BYTES
|
from image.docker.schema2.test.test_config import CONFIG_BYTES
|
||||||
from image.docker.schemautil import ContentRetrieverForTesting
|
from image.docker.schemautil import ContentRetrieverForTesting
|
||||||
|
|
||||||
|
@ -279,7 +279,7 @@ def test_generate_legacy_layers():
|
||||||
legacy_layers = list(manifest.generate_legacy_layers({}, retriever))
|
legacy_layers = list(manifest.generate_legacy_layers({}, retriever))
|
||||||
assert len(legacy_layers) == 3
|
assert len(legacy_layers) == 3
|
||||||
assert legacy_layers[0].content_checksum == 'sha256:abc123'
|
assert legacy_layers[0].content_checksum == 'sha256:abc123'
|
||||||
assert legacy_layers[1].content_checksum == EMPTY_BLOB_DIGEST
|
assert legacy_layers[1].content_checksum == EMPTY_LAYER_BLOB_DIGEST
|
||||||
assert legacy_layers[2].content_checksum == 'sha256:def456'
|
assert legacy_layers[2].content_checksum == 'sha256:def456'
|
||||||
|
|
||||||
assert legacy_layers[0].created == "2018-04-03T18:37:09.284840891Z"
|
assert legacy_layers[0].created == "2018-04-03T18:37:09.284840891Z"
|
||||||
|
|
|
@ -96,6 +96,9 @@ class BrokenManifest(ManifestInterface):
|
||||||
def has_legacy_image(self):
|
def has_legacy_image(self):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def get_requires_empty_layer_blob(self, content_retriever):
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class ManifestBackfillWorker(Worker):
|
class ManifestBackfillWorker(Worker):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
Reference in a new issue