Merge pull request #3362 from quay/shared-layer-fixes
Shared blob fixes and optimizations
This commit is contained in:
commit
39db907172
9 changed files with 121 additions and 26 deletions
|
@ -164,6 +164,18 @@ def initiate_upload(namespace, repo_name, uuid, location_name, storage_metadata)
|
|||
storage_metadata=storage_metadata)
|
||||
|
||||
|
||||
def get_shared_blob(digest):
|
||||
""" Returns the ImageStorage blob with the given digest or, if not present,
|
||||
returns None. This method is *only* to be used for shared blobs that are
|
||||
globally accessible, such as the special empty gzipped tar layer that Docker
|
||||
no longer pushes to us.
|
||||
"""
|
||||
try:
|
||||
return ImageStorage.get(content_checksum=digest, uploading=False)
|
||||
except ImageStorage.DoesNotExist:
|
||||
return None
|
||||
|
||||
|
||||
def get_or_create_shared_blob(digest, byte_data, storage):
|
||||
""" Returns the ImageStorage blob with the given digest or, if not present,
|
||||
adds a row and writes the given byte data to the storage engine.
|
||||
|
|
|
@ -7,7 +7,7 @@ from peewee import IntegrityError, JOIN
|
|||
from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild,
|
||||
db_transaction)
|
||||
from data.model import BlobDoesNotExist
|
||||
from data.model.blob import get_or_create_shared_blob
|
||||
from data.model.blob import get_or_create_shared_blob, get_shared_blob
|
||||
from data.model.oci.tag import filter_to_alive_tags
|
||||
from data.model.oci.label import create_manifest_label
|
||||
from data.model.oci.retriever import RepositoryContentRetriever
|
||||
|
@ -108,9 +108,20 @@ def _create_manifest(repository_id, manifest_interface_instance, storage):
|
|||
# Ensure all the blobs in the manifest exist.
|
||||
digests = set(manifest_interface_instance.local_blob_digests)
|
||||
blob_map = {}
|
||||
|
||||
# If the special empty layer is required, simply load it directly. This is much faster
|
||||
# than trying to load it on a per repository basis, and that is unnecessary anyway since
|
||||
# this layer is predefined.
|
||||
if EMPTY_LAYER_BLOB_DIGEST in digests:
|
||||
digests.remove(EMPTY_LAYER_BLOB_DIGEST)
|
||||
blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(EMPTY_LAYER_BLOB_DIGEST)
|
||||
if not blob_map[EMPTY_LAYER_BLOB_DIGEST]:
|
||||
logger.warning('Could not find the special empty blob in storage')
|
||||
return None
|
||||
|
||||
if digests:
|
||||
query = lookup_repo_storages_by_content_checksum(repository_id, digests)
|
||||
blob_map = {s.content_checksum: s for s in query}
|
||||
blob_map.update({s.content_checksum: s for s in query})
|
||||
for digest_str in digests:
|
||||
if digest_str not in blob_map:
|
||||
logger.warning('Unknown blob `%s` under manifest `%s` for repository `%s`', digest_str,
|
||||
|
@ -120,11 +131,12 @@ def _create_manifest(repository_id, manifest_interface_instance, storage):
|
|||
# Special check: If the empty layer blob is needed for this manifest, add it to the
|
||||
# blob map. This is necessary because Docker decided to elide sending of this special
|
||||
# empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
|
||||
if manifest_interface_instance.get_requires_empty_layer_blob(retriever):
|
||||
shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage)
|
||||
assert not shared_blob.uploading
|
||||
assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
|
||||
blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob
|
||||
if EMPTY_LAYER_BLOB_DIGEST not in blob_map:
|
||||
if manifest_interface_instance.get_requires_empty_layer_blob(retriever):
|
||||
shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage)
|
||||
assert not shared_blob.uploading
|
||||
assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
|
||||
blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob
|
||||
|
||||
# Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
|
||||
# image.
|
||||
|
|
|
@ -264,6 +264,9 @@ def get_layer_path_for_storage(storage_uuid, cas_path, content_checksum):
|
|||
def lookup_repo_storages_by_content_checksum(repo, checksums, by_manifest=False):
|
||||
""" Looks up repository storages (without placements) matching the given repository
|
||||
and checksum. """
|
||||
if not checksums:
|
||||
return []
|
||||
|
||||
# There may be many duplicates of the checksums, so for performance reasons we are going
|
||||
# to use a union to select just one storage with each checksum
|
||||
queries = []
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
from app import storage
|
||||
from data import model, database
|
||||
|
||||
from test.fixtures import *
|
||||
|
@ -30,3 +31,19 @@ def test_store_blob(initialized_db):
|
|||
assert blob_storage3.id != blob_storage.id
|
||||
assert blob_storage3.image_size == 1234
|
||||
assert blob_storage3.uncompressed_size == 5678
|
||||
|
||||
|
||||
def test_get_or_create_shared_blob(initialized_db):
|
||||
shared = model.blob.get_or_create_shared_blob('sha256:abcdef', 'somecontent', storage)
|
||||
assert shared.content_checksum == 'sha256:abcdef'
|
||||
|
||||
again = model.blob.get_or_create_shared_blob('sha256:abcdef', 'somecontent', storage)
|
||||
assert shared == again
|
||||
|
||||
|
||||
def test_lookup_repo_storages_by_content_checksum(initialized_db):
|
||||
for image in database.Image.select():
|
||||
found = model.storage.lookup_repo_storages_by_content_checksum(image.repository,
|
||||
[image.storage.content_checksum])
|
||||
assert len(found) == 1
|
||||
assert found[0].content_checksum == image.storage.content_checksum
|
||||
|
|
|
@ -565,11 +565,13 @@ class OCIModel(SharedModel, RegistryDataInterface):
|
|||
there may be multiple records in the same repository for the same blob digest, so the return
|
||||
value of this function may change.
|
||||
"""
|
||||
image_storage = oci.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
|
||||
image_storage = self._get_shared_storage(blob_digest)
|
||||
if image_storage is None:
|
||||
return None
|
||||
image_storage = oci.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
|
||||
if image_storage is None:
|
||||
return None
|
||||
|
||||
assert image_storage.cas_path is not None
|
||||
assert image_storage.cas_path is not None
|
||||
|
||||
placements = None
|
||||
if include_placements:
|
||||
|
|
|
@ -120,7 +120,7 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
|
|||
|
||||
# Ensure all the blobs in the manifest exist.
|
||||
digests = manifest_interface_instance.checksums
|
||||
query = model.storage.lookup_repo_storages_by_content_checksum(repository_ref._db_id, digests)
|
||||
query = self._lookup_repo_storages_by_content_checksum(repository_ref._db_id, digests)
|
||||
blob_map = {s.content_checksum: s for s in query}
|
||||
for layer in manifest_interface_instance.layers:
|
||||
digest_str = str(layer.digest)
|
||||
|
@ -481,9 +481,7 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
|
|||
if manifest is None:
|
||||
return None
|
||||
|
||||
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo,
|
||||
manifest.checksums)
|
||||
|
||||
blob_query = self._lookup_repo_storages_by_content_checksum(repo, manifest.checksums)
|
||||
storage_map = {blob.content_checksum: blob.id for blob in blob_query}
|
||||
try:
|
||||
tag_manifest, _ = model.tag.associate_generated_tag_manifest_with_tag(tag_obj, manifest,
|
||||
|
@ -585,10 +583,12 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
|
|||
there may be multiple records in the same repository for the same blob digest, so the return
|
||||
value of this function may change.
|
||||
"""
|
||||
try:
|
||||
image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
|
||||
except model.BlobDoesNotExist:
|
||||
return None
|
||||
image_storage = self._get_shared_storage(blob_digest)
|
||||
if image_storage is None:
|
||||
try:
|
||||
image_storage = model.blob.get_repository_blob_by_digest(repository_ref._db_id, blob_digest)
|
||||
except model.BlobDoesNotExist:
|
||||
return None
|
||||
|
||||
assert image_storage.cas_path is not None
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ from data import database
|
|||
from data import model
|
||||
from data.cache import cache_key
|
||||
from data.model.oci.retriever import RepositoryContentRetriever
|
||||
from data.model.blob import get_shared_blob
|
||||
from data.registry_model.datatype import FromDictionaryException
|
||||
from data.registry_model.datatypes import (RepositoryReference, Blob, TorrentInfo, BlobUpload,
|
||||
LegacyImage, ManifestLayer, DerivedImage)
|
||||
|
@ -323,9 +324,8 @@ class SharedModel:
|
|||
if not len(local_blob_digests):
|
||||
return []
|
||||
|
||||
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id,
|
||||
local_blob_digests,
|
||||
by_manifest=by_manifest)
|
||||
blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, local_blob_digests,
|
||||
by_manifest=by_manifest)
|
||||
blobs = []
|
||||
for image_storage in blob_query:
|
||||
placements = None
|
||||
|
@ -356,9 +356,8 @@ class SharedModel:
|
|||
blob_digests.append(EMPTY_LAYER_BLOB_DIGEST)
|
||||
|
||||
if blob_digests:
|
||||
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id,
|
||||
blob_digests,
|
||||
by_manifest=by_manifest)
|
||||
blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, blob_digests,
|
||||
by_manifest=by_manifest)
|
||||
storage_map = {blob.content_checksum: blob for blob in blob_query}
|
||||
|
||||
|
||||
|
@ -441,3 +440,31 @@ class SharedModel:
|
|||
|
||||
# Sign the manifest with our signing key.
|
||||
return builder.build(docker_v2_signing_key)
|
||||
|
||||
def _get_shared_storage(self, blob_digest):
|
||||
""" Returns an ImageStorage row for the blob digest if it is a globally shared storage. """
|
||||
# If the EMPTY_LAYER_BLOB_DIGEST is in the checksums, look it up directly. Since we have
|
||||
# so many duplicate copies in the database currently, looking it up bound to a repository
|
||||
# can be incredibly slow, and, since it is defined as a globally shared layer, this is extra
|
||||
# work we don't need to do.
|
||||
if blob_digest == EMPTY_LAYER_BLOB_DIGEST:
|
||||
return get_shared_blob(EMPTY_LAYER_BLOB_DIGEST)
|
||||
|
||||
return None
|
||||
|
||||
def _lookup_repo_storages_by_content_checksum(self, repo, checksums, by_manifest=False):
|
||||
checksums = set(checksums)
|
||||
|
||||
# Load any shared storages first.
|
||||
extra_storages = []
|
||||
for checksum in list(checksums):
|
||||
shared_storage = self._get_shared_storage(checksum)
|
||||
if shared_storage is not None:
|
||||
extra_storages.append(shared_storage)
|
||||
checksums.remove(checksum)
|
||||
|
||||
found = []
|
||||
if checksums:
|
||||
found = list(model.storage.lookup_repo_storages_by_content_checksum(repo, checksums,
|
||||
by_manifest=by_manifest))
|
||||
return found + extra_storages
|
||||
|
|
|
@ -132,7 +132,8 @@ def _try_to_mount_blob(repository_ref, mount_blob_digest):
|
|||
return None
|
||||
|
||||
# Lookup if the mount blob's digest exists in the repository.
|
||||
mount_blob = registry_model.get_repo_blob_by_digest(from_repository_ref, mount_blob_digest)
|
||||
mount_blob = registry_model.get_cached_repo_blob(model_cache, from_namespace, from_repo_name,
|
||||
mount_blob_digest)
|
||||
if mount_blob is None:
|
||||
logger.debug('Blob `%s` under repository `%s` not found', mount_blob_digest, from_repo)
|
||||
return None
|
||||
|
|
|
@ -1204,7 +1204,8 @@ def test_blob_mounting(push_user, push_namespace, push_repo, mount_repo_name, ex
|
|||
options.mount_blobs = {'sha256:' + hashlib.sha256(image.bytes).hexdigest(): mount_repo_name
|
||||
for image in basic_images}
|
||||
|
||||
manifest_protocol.push(liveserver_session, 'devtable', 'newrepo', 'latest', basic_images,
|
||||
manifest_protocol.push(liveserver_session, 'devtable', 'newrepo', 'latest',
|
||||
basic_images,
|
||||
credentials=('devtable', 'password'),
|
||||
options=options,
|
||||
expected_failure=expected_failure)
|
||||
|
@ -1215,6 +1216,26 @@ def test_blob_mounting(push_user, push_namespace, push_repo, mount_repo_name, ex
|
|||
credentials=('devtable', 'password'))
|
||||
|
||||
|
||||
def test_blob_mounting_with_empty_layers(manifest_protocol, pusher, puller, images_with_empty_layer,
|
||||
liveserver_session, app_reloader):
|
||||
# Push an image so we can attempt to mount it.
|
||||
pusher.push(liveserver_session, 'devtable', 'simple', 'latest', images_with_empty_layer,
|
||||
credentials=('devtable', 'password'))
|
||||
|
||||
# Push again, trying to mount the image layer(s) from the mount repo.
|
||||
options = ProtocolOptions()
|
||||
options.scopes = ['repository:devtable/newrepo:push,pull',
|
||||
'repository:%s:pull' % ('devtable/simple')]
|
||||
options.mount_blobs = {'sha256:' + hashlib.sha256(image.bytes).hexdigest(): 'devtable/simple'
|
||||
for image in images_with_empty_layer}
|
||||
options.skip_head_checks = True
|
||||
|
||||
manifest_protocol.push(liveserver_session, 'devtable', 'newrepo', 'latest',
|
||||
images_with_empty_layer,
|
||||
credentials=('devtable', 'password'),
|
||||
options=options)
|
||||
|
||||
|
||||
def get_robot_password(api_caller):
|
||||
api_caller.conduct_auth('devtable', 'password')
|
||||
resp = api_caller.get('/api/v1/organization/buynlarge/robots/ownerbot')
|
||||
|
|
Reference in a new issue