Merge pull request #923 from jakedt/v2performance

Improve the performance of fetching manifest blobs by checksum.
This commit is contained in:
Jake Moshenko 2015-11-19 11:05:44 -05:00
commit 9c8e1eb506
3 changed files with 30 additions and 17 deletions

View file

@ -1,6 +1,6 @@
import logging
from peewee import JOIN_LEFT_OUTER, fn
from peewee import JOIN_LEFT_OUTER, fn, SQL
from data.model import config, db_transaction, InvalidImageException
from data.database import (ImageStorage, Image, DerivedImageStorage, ImageStoragePlacement,
@ -218,13 +218,26 @@ def get_layer_path(storage_record):
return store.blob_path(storage_record.content_checksum)
def lookup_repo_storages_by_content_checksum(repo, checksums):
""" Looks up repository storages (without placements) matching the given repository
and checksum. """
return (ImageStorage
.select()
.join(Image)
.where(Image.repository == repo, ImageStorage.content_checksum << checksums))
# There may be many duplicates of the checksums, so for performance reasons we are going
# to use a union to select just one storage with each checksum
queries = []
for checksum in set(checksums):
candidate_subq = (ImageStorage
.select(ImageStorage.id, ImageStorage.content_checksum)
.join(Image)
.where(Image.repository == repo, ImageStorage.content_checksum == checksum)
.limit(1))
queries.append(ImageStorage
.select(SQL('*'))
.from_(candidate_subq))
return reduce(lambda l, r: l.union_all(r), queries)
def get_storage_locations(uuid):
query = (ImageStoragePlacement

View file

@ -240,10 +240,10 @@ def load_manifest_by_digest(namespace, repo_name, digest):
def _load_repo_manifests(namespace, repo_name):
return (TagManifest
.select(TagManifest, RepositoryTag)
.join(RepositoryTag)
.join(Image)
.join(Repository)
.join(Namespace, on=(Namespace.id == Repository.namespace_user))
.where(Repository.name == repo_name, Namespace.username == namespace))
return _tag_alive(TagManifest
.select(TagManifest, RepositoryTag)
.join(RepositoryTag)
.join(Image)
.join(Repository)
.join(Namespace, on=(Namespace.id == Repository.namespace_user))
.where(Repository.name == repo_name, Namespace.username == namespace))

View file

@ -287,16 +287,16 @@ def _write_manifest(namespace, repo_name, manifest):
# know which V1 images we need to synthesize and which ones are invalid.
layers = list(manifest.layers)
docker_image_ids = [mdata.v1_metadata.docker_id for mdata in layers]
parent_image_ids = [mdata.v1_metadata.parent for mdata in layers
if mdata.v1_metadata.parent]
all_image_ids = list(set(docker_image_ids + parent_image_ids))
docker_image_ids = {mdata.v1_metadata.docker_id for mdata in layers}
parent_image_ids = {mdata.v1_metadata.parent for mdata in layers
if mdata.v1_metadata.parent}
all_image_ids = list(docker_image_ids | parent_image_ids)
images_query = model.image.lookup_repository_images(repo, all_image_ids)
images_map = {image.docker_image_id: image for image in images_query}
# Lookup the storages associated with each blob in the manifest.
checksums = [str(mdata.digest) for mdata in manifest.layers]
checksums = list({str(mdata.digest) for mdata in manifest.layers})
storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums)
storage_map = {storage.content_checksum: storage for storage in storage_query}