Merge pull request #923 from jakedt/v2performance
Improve the performance of fetching manifest blobs by checksum.
This commit is contained in:
commit
9c8e1eb506
3 changed files with 30 additions and 17 deletions
|
@ -1,6 +1,6 @@
|
|||
import logging
|
||||
|
||||
from peewee import JOIN_LEFT_OUTER, fn
|
||||
from peewee import JOIN_LEFT_OUTER, fn, SQL
|
||||
|
||||
from data.model import config, db_transaction, InvalidImageException
|
||||
from data.database import (ImageStorage, Image, DerivedImageStorage, ImageStoragePlacement,
|
||||
|
@ -218,13 +218,26 @@ def get_layer_path(storage_record):
|
|||
|
||||
return store.blob_path(storage_record.content_checksum)
|
||||
|
||||
|
||||
def lookup_repo_storages_by_content_checksum(repo, checksums):
|
||||
""" Looks up repository storages (without placements) matching the given repository
|
||||
and checksum. """
|
||||
return (ImageStorage
|
||||
.select()
|
||||
.join(Image)
|
||||
.where(Image.repository == repo, ImageStorage.content_checksum << checksums))
|
||||
# There may be many duplicates of the checksums, so for performance reasons we are going
|
||||
# to use a union to select just one storage with each checksum
|
||||
queries = []
|
||||
|
||||
for checksum in set(checksums):
|
||||
candidate_subq = (ImageStorage
|
||||
.select(ImageStorage.id, ImageStorage.content_checksum)
|
||||
.join(Image)
|
||||
.where(Image.repository == repo, ImageStorage.content_checksum == checksum)
|
||||
.limit(1))
|
||||
queries.append(ImageStorage
|
||||
.select(SQL('*'))
|
||||
.from_(candidate_subq))
|
||||
|
||||
return reduce(lambda l, r: l.union_all(r), queries)
|
||||
|
||||
|
||||
def get_storage_locations(uuid):
|
||||
query = (ImageStoragePlacement
|
||||
|
|
|
@ -240,10 +240,10 @@ def load_manifest_by_digest(namespace, repo_name, digest):
|
|||
|
||||
|
||||
def _load_repo_manifests(namespace, repo_name):
|
||||
return (TagManifest
|
||||
.select(TagManifest, RepositoryTag)
|
||||
.join(RepositoryTag)
|
||||
.join(Image)
|
||||
.join(Repository)
|
||||
.join(Namespace, on=(Namespace.id == Repository.namespace_user))
|
||||
.where(Repository.name == repo_name, Namespace.username == namespace))
|
||||
return _tag_alive(TagManifest
|
||||
.select(TagManifest, RepositoryTag)
|
||||
.join(RepositoryTag)
|
||||
.join(Image)
|
||||
.join(Repository)
|
||||
.join(Namespace, on=(Namespace.id == Repository.namespace_user))
|
||||
.where(Repository.name == repo_name, Namespace.username == namespace))
|
||||
|
|
|
@ -287,16 +287,16 @@ def _write_manifest(namespace, repo_name, manifest):
|
|||
# know which V1 images we need to synthesize and which ones are invalid.
|
||||
layers = list(manifest.layers)
|
||||
|
||||
docker_image_ids = [mdata.v1_metadata.docker_id for mdata in layers]
|
||||
parent_image_ids = [mdata.v1_metadata.parent for mdata in layers
|
||||
if mdata.v1_metadata.parent]
|
||||
all_image_ids = list(set(docker_image_ids + parent_image_ids))
|
||||
docker_image_ids = {mdata.v1_metadata.docker_id for mdata in layers}
|
||||
parent_image_ids = {mdata.v1_metadata.parent for mdata in layers
|
||||
if mdata.v1_metadata.parent}
|
||||
all_image_ids = list(docker_image_ids | parent_image_ids)
|
||||
|
||||
images_query = model.image.lookup_repository_images(repo, all_image_ids)
|
||||
images_map = {image.docker_image_id: image for image in images_query}
|
||||
|
||||
# Lookup the storages associated with each blob in the manifest.
|
||||
checksums = [str(mdata.digest) for mdata in manifest.layers]
|
||||
checksums = list({str(mdata.digest) for mdata in manifest.layers})
|
||||
storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums)
|
||||
storage_map = {storage.content_checksum: storage for storage in storage_query}
|
||||
|
||||
|
|
Reference in a new issue