Optimize lookup of shared global blobs
Currently, we only have one (the shared empty layer), but this should make the blob lookups for repositories significantly faster, as we won't need to do the massive join.
This commit is contained in:
parent
7beac643ec
commit
f75f315037
6 changed files with 78 additions and 24 deletions
|
@ -7,7 +7,7 @@ from peewee import IntegrityError, JOIN
|
|||
from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild,
|
||||
db_transaction)
|
||||
from data.model import BlobDoesNotExist
|
||||
from data.model.blob import get_or_create_shared_blob
|
||||
from data.model.blob import get_or_create_shared_blob, get_shared_blob
|
||||
from data.model.oci.tag import filter_to_alive_tags
|
||||
from data.model.oci.label import create_manifest_label
|
||||
from data.model.oci.retriever import RepositoryContentRetriever
|
||||
|
@ -108,9 +108,20 @@ def _create_manifest(repository_id, manifest_interface_instance, storage):
|
|||
# Ensure all the blobs in the manifest exist.
|
||||
digests = set(manifest_interface_instance.local_blob_digests)
|
||||
blob_map = {}
|
||||
|
||||
# If the special empty layer is required, simply load it directly. This is much faster
|
||||
# than trying to load it on a per repository basis, and that is unnecessary anyway since
|
||||
# this layer is predefined.
|
||||
if EMPTY_LAYER_BLOB_DIGEST in digests:
|
||||
digests.remove(EMPTY_LAYER_BLOB_DIGEST)
|
||||
blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(EMPTY_LAYER_BLOB_DIGEST)
|
||||
if not blob_map[EMPTY_LAYER_BLOB_DIGEST]:
|
||||
logger.warning('Could not find the special empty blob in storage')
|
||||
return None
|
||||
|
||||
if digests:
|
||||
query = lookup_repo_storages_by_content_checksum(repository_id, digests)
|
||||
blob_map = {s.content_checksum: s for s in query}
|
||||
blob_map.update({s.content_checksum: s for s in query})
|
||||
for digest_str in digests:
|
||||
if digest_str not in blob_map:
|
||||
logger.warning('Unknown blob `%s` under manifest `%s` for repository `%s`', digest_str,
|
||||
|
@ -120,11 +131,12 @@ def _create_manifest(repository_id, manifest_interface_instance, storage):
|
|||
# Special check: If the empty layer blob is needed for this manifest, add it to the
|
||||
# blob map. This is necessary because Docker decided to elide sending of this special
|
||||
# empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
|
||||
if manifest_interface_instance.get_requires_empty_layer_blob(retriever):
|
||||
shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage)
|
||||
assert not shared_blob.uploading
|
||||
assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
|
||||
blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob
|
||||
if EMPTY_LAYER_BLOB_DIGEST not in blob_map:
|
||||
if manifest_interface_instance.get_requires_empty_layer_blob(retriever):
|
||||
shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage)
|
||||
assert not shared_blob.uploading
|
||||
assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
|
||||
blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob
|
||||
|
||||
# Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
|
||||
# image.
|
||||
|
|
Reference in a new issue