Optimize lookup of shared global blobs
Currently, we only have one (the shared empty layer), but this should make the blob lookups for repositories significantly faster, as we won't need to do the massive join.
This commit is contained in:
parent
7beac643ec
commit
f75f315037
6 changed files with 78 additions and 24 deletions
|
@ -8,6 +8,7 @@ from data import database
|
|||
from data import model
|
||||
from data.cache import cache_key
|
||||
from data.model.oci.retriever import RepositoryContentRetriever
|
||||
from data.model.blob import get_shared_blob
|
||||
from data.registry_model.datatype import FromDictionaryException
|
||||
from data.registry_model.datatypes import (RepositoryReference, Blob, TorrentInfo, BlobUpload,
|
||||
LegacyImage, ManifestLayer, DerivedImage)
|
||||
|
@ -323,9 +324,8 @@ class SharedModel:
|
|||
if not len(local_blob_digests):
|
||||
return []
|
||||
|
||||
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id,
|
||||
local_blob_digests,
|
||||
by_manifest=by_manifest)
|
||||
blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, local_blob_digests,
|
||||
by_manifest=by_manifest)
|
||||
blobs = []
|
||||
for image_storage in blob_query:
|
||||
placements = None
|
||||
|
@ -356,9 +356,8 @@ class SharedModel:
|
|||
blob_digests.append(EMPTY_LAYER_BLOB_DIGEST)
|
||||
|
||||
if blob_digests:
|
||||
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo_id,
|
||||
blob_digests,
|
||||
by_manifest=by_manifest)
|
||||
blob_query = self._lookup_repo_storages_by_content_checksum(repo_id, blob_digests,
|
||||
by_manifest=by_manifest)
|
||||
storage_map = {blob.content_checksum: blob for blob in blob_query}
|
||||
|
||||
|
||||
|
@ -441,3 +440,29 @@ class SharedModel:
|
|||
|
||||
# Sign the manifest with our signing key.
|
||||
return builder.build(docker_v2_signing_key)
|
||||
|
||||
def _get_shared_storage(self, blob_digest):
|
||||
""" Returns an ImageStorage row for the blob digest if it is a globally shared storage. """
|
||||
# If the EMPTY_LAYER_BLOB_DIGEST is in the checksums, look it up directly. Since we have
|
||||
# so many duplicate copies in the database currently, looking it up bound to a repository
|
||||
# can be incredibly slow, and, since it is defined as a globally shared layer, this is extra
|
||||
# work we don't need to do.
|
||||
if blob_digest == EMPTY_LAYER_BLOB_DIGEST:
|
||||
return get_shared_blob(EMPTY_LAYER_BLOB_DIGEST)
|
||||
|
||||
return None
|
||||
|
||||
def _lookup_repo_storages_by_content_checksum(self, repo, checksums, by_manifest=False):
|
||||
# Load any shared storages first.
|
||||
extra_storages = []
|
||||
for checksum in list(checksums):
|
||||
shared_storage = self._get_shared_storage(checksum)
|
||||
if shared_storage is not None:
|
||||
extra_storages.append(shared_storage)
|
||||
checksums.remove(checksum)
|
||||
|
||||
found = []
|
||||
if checksums:
|
||||
found = list(model.storage.lookup_repo_storages_by_content_checksum(repo, checksums,
|
||||
by_manifest=by_manifest))
|
||||
return found + extra_storages
|
||||
|
|
Reference in a new issue