Simplifying queries around images and placements

Only verbs needs to load placements for multiple images, so we can vastly simplify and optimize most queries by making it two-step, and having the rest of the image loads not worry about placements
This commit is contained in:
Joseph Schorr 2018-04-02 14:16:43 -04:00
parent 0c7c9a7a0a
commit 8146646761
7 changed files with 78 additions and 141 deletions

View File

@ -1,9 +1,11 @@
import logging
import hashlib
import json
from collections import defaultdict
from datetime import datetime
import dateutil.parser
from datetime import datetime
from peewee import JOIN_LEFT_OUTER, IntegrityError, fn
from data.model import (DataModelException, db_transaction, _basequery, storage,
@ -31,23 +33,10 @@ def get_image_with_storage(docker_image_id, storage_uuid):
return None
def get_parent_images_with_placements(namespace_name, repository_name, image_obj):
""" Returns a list of parent Image objects starting with the most recent parent
and ending with the base layer. The images in this query will include the storage and
placements.
"""
return _get_parent_images(namespace_name, repository_name, image_obj, include_placements=True)
def get_parent_images(namespace_name, repository_name, image_obj):
""" Returns a list of parent Image objects starting with the most recent parent
and ending with the base layer. The images in this query will include the storage but
not the placements.
and ending with the base layer. The images in this query will include the storage.
"""
return _get_parent_images(namespace_name, repository_name, image_obj, include_placements=False)
def _get_parent_images(namespace_name, repository_name, image_obj, include_placements=False):
parents = image_obj.ancestors
# Ancestors are in the format /<root>/<intermediate>/.../<parent>/, with each path section
@ -59,12 +48,8 @@ def _get_parent_images(namespace_name, repository_name, image_obj, include_place
def filter_to_parents(query):
return query.where(Image.id << parent_db_ids)
if include_placements:
parents = get_repository_images_base(namespace_name, repository_name, filter_to_parents)
else:
parents = _get_repository_images_and_storages(namespace_name, repository_name,
filter_to_parents)
parents = _get_repository_images_and_storages(namespace_name, repository_name,
filter_to_parents)
id_to_image = {unicode(image.id): image for image in parents}
try:
return [id_to_image[parent_id] for parent_id in reversed(parent_db_ids)]
@ -72,7 +57,47 @@ def _get_parent_images(namespace_name, repository_name, image_obj, include_place
raise DataModelException('Unknown parent image')
def get_placements_for_images(images):
""" Returns the placements for the given images, as a map from image storage ID to placements. """
if not images:
return {}
query = (ImageStoragePlacement
.select(ImageStoragePlacement, ImageStorageLocation, ImageStorage)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage)
.where(ImageStorage.id << [image.storage_id for image in images]))
placement_map = defaultdict(list)
for placement in query:
placement_map[placement.storage.id].append(placement)
return dict(placement_map)
def get_image_and_placements(namespace_name, repo_name, docker_image_id):
""" Returns the repo image (with a storage object) and storage placements for the image
or (None, None) if non found.
"""
repo_image = get_repo_image_and_storage(namespace_name, repo_name, docker_image_id)
if repo_image is None:
return (None, None)
query = (ImageStoragePlacement
.select(ImageStoragePlacement, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage)
.where(ImageStorage.id == repo_image.storage_id))
return repo_image, list(query)
def get_repo_image(namespace_name, repository_name, docker_image_id):
""" Returns the repository image with the given Docker image ID or None if none.
Does not include the storage object.
"""
def limit_to_image_id(query):
return query.where(Image.docker_image_id == docker_image_id).limit(1)
@ -83,18 +108,10 @@ def get_repo_image(namespace_name, repository_name, docker_image_id):
return None
def get_repo_image_extended(namespace_name, repository_name, docker_image_id):
def limit_to_image_id(query):
return query.where(Image.docker_image_id == docker_image_id)
images = get_repository_images_base(namespace_name, repository_name, limit_to_image_id)
if not images:
return None
return images[0]
def get_repo_image_and_storage(namespace_name, repository_name, docker_image_id):
""" Returns the repository image with the given Docker image ID or None if none.
Includes the storage object.
"""
def limit_to_image_id(query):
return query.where(Image.docker_image_id == docker_image_id)
@ -105,6 +122,17 @@ def get_repo_image_and_storage(namespace_name, repository_name, docker_image_id)
return images[0]
def get_image_by_id(namespace_name, repository_name, docker_image_id):
""" Returns the repository image with the given Docker image ID or raises if not found.
Includes the storage object.
"""
image = get_repo_image_and_storage(namespace_name, repository_name, docker_image_id)
if not image:
raise InvalidImageException('Unable to find image \'%s\' for repo \'%s/%s\'' %
(docker_image_id, namespace_name, repository_name))
return image
def _get_repository_images_and_storages(namespace_name, repository_name, query_modifier):
query = (Image
.select(Image, ImageStorage)
@ -129,47 +157,6 @@ def _get_repository_images(namespace_name, repository_name, query_modifier):
return query
def get_repository_images_base(namespace_name, repository_name, query_modifier):
query = (ImageStoragePlacement
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.join(Image)
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(Repository.name == repository_name, Namespace.username == namespace_name))
query = query_modifier(query)
return invert_placement_query_results(query)
def invert_placement_query_results(placement_query):
""" This method will take a query which returns placements, storages, and images, and have it
return images and their storages, along with the placement set on each storage.
"""
location_list = list(placement_query)
images = {}
for location in location_list:
# Make sure we're always retrieving the same image object.
image = location.storage.image
# Set the storage to the one we got from the location, to prevent another query
image.storage = location.storage
if not image.id in images:
images[image.id] = image
image.storage.locations = set()
else:
image = images[image.id]
# Add the location to the image's locations set.
image.storage.locations.add(location.location.name)
return images.values()
def lookup_repository_images(repo, docker_image_ids):
return (Image
.select(Image, ImageStorage)
@ -177,13 +164,6 @@ def lookup_repository_images(repo, docker_image_ids):
.where(Image.repository == repo, Image.docker_image_id << docker_image_ids))
def get_matching_repository_images(namespace_name, repository_name, docker_image_ids):
def modify_query(query):
return query.where(Image.docker_image_id << list(docker_image_ids))
return get_repository_images_base(namespace_name, repository_name, modify_query)
def get_repository_images_without_placements(repo_obj, with_ancestor=None):
query = (Image
.select(Image, ImageStorage)
@ -199,15 +179,8 @@ def get_repository_images_without_placements(repo_obj, with_ancestor=None):
def get_repository_images(namespace_name, repository_name):
return get_repository_images_base(namespace_name, repository_name, lambda q: q)
def get_image_by_id(namespace_name, repository_name, docker_image_id):
image = get_repo_image_extended(namespace_name, repository_name, docker_image_id)
if not image:
raise InvalidImageException('Unable to find image \'%s\' for repo \'%s/%s\'' %
(docker_image_id, namespace_name, repository_name))
return image
""" Returns all the repository images in the repository. Does not include storage objects. """
return _get_repository_images(namespace_name, repository_name, lambda q: q)
def __translate_ancestry(old_ancestry, translations, repo_obj, username, preferred_location):
@ -253,8 +226,6 @@ def _find_or_link_image(existing_image, repo_obj, username, translations, prefer
username, preferred_location)
copied_storage = to_copy.storage
copied_storage.locations = {placement.location.name
for placement in copied_storage.imagestorageplacement_set}
translated_parent_id = None
if new_image_ancestry != '/':
@ -403,24 +374,6 @@ def get_repo_image_by_storage_checksum(namespace, repository_name, storage_check
raise InvalidImageException(msg)
def get_image_layers(image):
""" Returns a list of the full layers of an image, including itself (if specified), sorted
from base image outward. """
image_ids = image.ancestor_id_list() + [image.id]
query = (ImageStoragePlacement
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.join(Image)
.where(Image.id << image_ids))
image_list = list(invert_placement_query_results(query))
image_list.sort(key=lambda img: image_ids.index(img.id))
return image_list
def synthesize_v1_image(repo, image_storage_id, storage_image_size, docker_image_id,
created_date_str, comment, command, v1_json_metadata, parent_image=None):
""" Find an existing image with this docker image id, and if none exists, write one with the
@ -507,6 +460,7 @@ def get_image_with_storage_and_parent_base():
.join(Parent, JOIN_LEFT_OUTER, on=(Image.parent == Parent.id))
.join(ParentImageStorage, JOIN_LEFT_OUTER, on=(ParentImageStorage.id == Parent.storage)))
def set_secscan_status(image, indexed, version):
query = (Image
.select()

View File

@ -44,7 +44,7 @@ class PreOCIModel(ImageInterface):
return [_build_image(image) for image in all_images]
def get_repository_image(self, namespace_name, repo_name, docker_image_id):
image = model.image.get_repo_image_extended(namespace_name, repo_name, docker_image_id)
image = model.image.get_repo_image_and_storage(namespace_name, repo_name, docker_image_id)
if not image:
return None

View File

@ -11,10 +11,12 @@ class PreOCIModel(DockerRegistryV1DataInterface):
"""
def placement_locations_and_path_docker_v1(self, namespace_name, repo_name, image_id):
repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id)
if not repo_image or repo_image.storage is None:
image, placements = model.image.get_image_and_placements(namespace_name, repo_name, image_id)
if image is None:
return None, None
return repo_image.storage.locations, model.storage.get_layer_path(repo_image.storage)
locations = {placement.location.name for placement in placements}
return locations, model.storage.get_layer_path(image.storage)
def docker_v1_metadata(self, namespace_name, repo_name, image_id):
repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id)

View File

@ -30,8 +30,9 @@ class PreOCIModel(VerbsDataInterface):
repo_image_record = model.image.get_image_by_id(
repo_image.repository.namespace_name, repo_image.repository.name, repo_image.image_id)
parents = model.image.get_parent_images_with_placements(
parents = model.image.get_parent_images(
repo_image.repository.namespace_name, repo_image.repository.name, repo_image_record)
placements_map = model.image.get_placements_for_images(parents)
yield repo_image
@ -42,9 +43,10 @@ class PreOCIModel(VerbsDataInterface):
except ValueError:
pass
locations = [placement.location.name for placement in placements_map[parent.storage.id]]
yield ImageWithBlob(
image_id=parent.docker_image_id,
blob=_blob(parent.storage),
blob=_blob(parent.storage, locations=locations),
repository=repo_image.repository,
compat_metadata=metadata,
v1_metadata=_docker_v1_metadata(repo_image.repository.namespace_name,
@ -175,15 +177,11 @@ def _derived_image(blob_record, repo_image):
internal_source_image_db_id=repo_image.internal_db_id,)
def _blob(blob_record):
def _blob(blob_record, locations=None):
"""
Returns a Blob object for the given Pre-OCI data model blob instance.
"""
if hasattr(blob_record, 'locations'):
locations = blob_record.locations
else:
locations = model.storage.get_storage_locations(blob_record.uuid)
locations = locations or model.storage.get_storage_locations(blob_record.uuid)
return Blob(
uuid=blob_record.uuid,
size=blob_record.image_size,

View File

@ -584,7 +584,7 @@ def populate_database(minimal=False, with_storage=False):
'Complex repository with many branches and tags.',
False, [(new_user_2, 'read'), (dtrobot[0], 'read')],
(2, [(3, [], 'v2.0'),
(1, [(1, [(1, [], ['prod'])],
(1, [(1, [(2, [], ['prod'])],
'staging'),
(1, [], None)], None)], None))

View File

@ -2829,7 +2829,7 @@ class TestListAndDeleteTag(ApiTestCase):
repository=ADMIN_ACCESS_USER + '/complex', tag='staging'))
staging_images = json['images']
assert len(prod_images) == len(staging_images) + 1
assert len(prod_images) == len(staging_images) + 2
# Delete prod.
self.deleteEmptyResponse(RepositoryTag,

View File

@ -58,7 +58,7 @@ class TestImageTree(unittest.TestCase):
return True
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(4, len(result))
self.assertEquals(5, len(result))
self.assertEquals('prod', tree.tag_containing_image(result[-1]))
def test_filtering(self):
@ -74,23 +74,6 @@ class TestImageTree(unittest.TestCase):
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(0, len(result))
def test_find_tag_parent_image(self):
all_images = list(model.image.get_repository_images(NAMESPACE, COMPLEX_REPO))
all_tags = list(model.tag.list_repository_tags(NAMESPACE, COMPLEX_REPO))
tree = ImageTree(all_images, all_tags)
base_image = self._get_base_image(all_images)
def checker(index, image):
return True
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(4, len(result))
# Only use the first two images. They don't have tags, but the method should
# still return the tag that contains them.
self.assertEquals('staging', tree.tag_containing_image(result[0]))
def test_longest_path_simple_repo_direct_lookup(self):
repository = model.repository.get_repository(NAMESPACE, SIMPLE_REPO)
all_images = list(model.image.get_repository_images(NAMESPACE, SIMPLE_REPO))