Simplifying queries around images and placements
Only verbs needs to load placements for multiple images, so we can vastly simplify and optimize most queries by making it two-step, and having the rest of the image loads not worry about placements
This commit is contained in:
parent
0c7c9a7a0a
commit
8146646761
7 changed files with 78 additions and 141 deletions
|
@ -1,9 +1,11 @@
|
|||
import logging
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
import dateutil.parser
|
||||
|
||||
from datetime import datetime
|
||||
from peewee import JOIN_LEFT_OUTER, IntegrityError, fn
|
||||
|
||||
from data.model import (DataModelException, db_transaction, _basequery, storage,
|
||||
|
@ -31,23 +33,10 @@ def get_image_with_storage(docker_image_id, storage_uuid):
|
|||
return None
|
||||
|
||||
|
||||
def get_parent_images_with_placements(namespace_name, repository_name, image_obj):
|
||||
""" Returns a list of parent Image objects starting with the most recent parent
|
||||
and ending with the base layer. The images in this query will include the storage and
|
||||
placements.
|
||||
"""
|
||||
return _get_parent_images(namespace_name, repository_name, image_obj, include_placements=True)
|
||||
|
||||
|
||||
def get_parent_images(namespace_name, repository_name, image_obj):
|
||||
""" Returns a list of parent Image objects starting with the most recent parent
|
||||
and ending with the base layer. The images in this query will include the storage but
|
||||
not the placements.
|
||||
and ending with the base layer. The images in this query will include the storage.
|
||||
"""
|
||||
return _get_parent_images(namespace_name, repository_name, image_obj, include_placements=False)
|
||||
|
||||
|
||||
def _get_parent_images(namespace_name, repository_name, image_obj, include_placements=False):
|
||||
parents = image_obj.ancestors
|
||||
|
||||
# Ancestors are in the format /<root>/<intermediate>/.../<parent>/, with each path section
|
||||
|
@ -59,12 +48,8 @@ def _get_parent_images(namespace_name, repository_name, image_obj, include_place
|
|||
def filter_to_parents(query):
|
||||
return query.where(Image.id << parent_db_ids)
|
||||
|
||||
if include_placements:
|
||||
parents = get_repository_images_base(namespace_name, repository_name, filter_to_parents)
|
||||
else:
|
||||
parents = _get_repository_images_and_storages(namespace_name, repository_name,
|
||||
filter_to_parents)
|
||||
|
||||
id_to_image = {unicode(image.id): image for image in parents}
|
||||
try:
|
||||
return [id_to_image[parent_id] for parent_id in reversed(parent_db_ids)]
|
||||
|
@ -72,7 +57,47 @@ def _get_parent_images(namespace_name, repository_name, image_obj, include_place
|
|||
raise DataModelException('Unknown parent image')
|
||||
|
||||
|
||||
def get_placements_for_images(images):
|
||||
""" Returns the placements for the given images, as a map from image storage ID to placements. """
|
||||
if not images:
|
||||
return {}
|
||||
|
||||
query = (ImageStoragePlacement
|
||||
.select(ImageStoragePlacement, ImageStorageLocation, ImageStorage)
|
||||
.join(ImageStorageLocation)
|
||||
.switch(ImageStoragePlacement)
|
||||
.join(ImageStorage)
|
||||
.where(ImageStorage.id << [image.storage_id for image in images]))
|
||||
|
||||
placement_map = defaultdict(list)
|
||||
for placement in query:
|
||||
placement_map[placement.storage.id].append(placement)
|
||||
|
||||
return dict(placement_map)
|
||||
|
||||
|
||||
def get_image_and_placements(namespace_name, repo_name, docker_image_id):
|
||||
""" Returns the repo image (with a storage object) and storage placements for the image
|
||||
or (None, None) if non found.
|
||||
"""
|
||||
repo_image = get_repo_image_and_storage(namespace_name, repo_name, docker_image_id)
|
||||
if repo_image is None:
|
||||
return (None, None)
|
||||
|
||||
query = (ImageStoragePlacement
|
||||
.select(ImageStoragePlacement, ImageStorageLocation)
|
||||
.join(ImageStorageLocation)
|
||||
.switch(ImageStoragePlacement)
|
||||
.join(ImageStorage)
|
||||
.where(ImageStorage.id == repo_image.storage_id))
|
||||
|
||||
return repo_image, list(query)
|
||||
|
||||
|
||||
def get_repo_image(namespace_name, repository_name, docker_image_id):
|
||||
""" Returns the repository image with the given Docker image ID or None if none.
|
||||
Does not include the storage object.
|
||||
"""
|
||||
def limit_to_image_id(query):
|
||||
return query.where(Image.docker_image_id == docker_image_id).limit(1)
|
||||
|
||||
|
@ -83,18 +108,10 @@ def get_repo_image(namespace_name, repository_name, docker_image_id):
|
|||
return None
|
||||
|
||||
|
||||
def get_repo_image_extended(namespace_name, repository_name, docker_image_id):
|
||||
def limit_to_image_id(query):
|
||||
return query.where(Image.docker_image_id == docker_image_id)
|
||||
|
||||
images = get_repository_images_base(namespace_name, repository_name, limit_to_image_id)
|
||||
if not images:
|
||||
return None
|
||||
|
||||
return images[0]
|
||||
|
||||
|
||||
def get_repo_image_and_storage(namespace_name, repository_name, docker_image_id):
|
||||
""" Returns the repository image with the given Docker image ID or None if none.
|
||||
Includes the storage object.
|
||||
"""
|
||||
def limit_to_image_id(query):
|
||||
return query.where(Image.docker_image_id == docker_image_id)
|
||||
|
||||
|
@ -105,6 +122,17 @@ def get_repo_image_and_storage(namespace_name, repository_name, docker_image_id)
|
|||
return images[0]
|
||||
|
||||
|
||||
def get_image_by_id(namespace_name, repository_name, docker_image_id):
|
||||
""" Returns the repository image with the given Docker image ID or raises if not found.
|
||||
Includes the storage object.
|
||||
"""
|
||||
image = get_repo_image_and_storage(namespace_name, repository_name, docker_image_id)
|
||||
if not image:
|
||||
raise InvalidImageException('Unable to find image \'%s\' for repo \'%s/%s\'' %
|
||||
(docker_image_id, namespace_name, repository_name))
|
||||
return image
|
||||
|
||||
|
||||
def _get_repository_images_and_storages(namespace_name, repository_name, query_modifier):
|
||||
query = (Image
|
||||
.select(Image, ImageStorage)
|
||||
|
@ -129,47 +157,6 @@ def _get_repository_images(namespace_name, repository_name, query_modifier):
|
|||
return query
|
||||
|
||||
|
||||
def get_repository_images_base(namespace_name, repository_name, query_modifier):
|
||||
query = (ImageStoragePlacement
|
||||
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
|
||||
.join(ImageStorageLocation)
|
||||
.switch(ImageStoragePlacement)
|
||||
.join(ImageStorage, JOIN_LEFT_OUTER)
|
||||
.join(Image)
|
||||
.join(Repository)
|
||||
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
|
||||
.where(Repository.name == repository_name, Namespace.username == namespace_name))
|
||||
|
||||
query = query_modifier(query)
|
||||
return invert_placement_query_results(query)
|
||||
|
||||
|
||||
def invert_placement_query_results(placement_query):
|
||||
""" This method will take a query which returns placements, storages, and images, and have it
|
||||
return images and their storages, along with the placement set on each storage.
|
||||
"""
|
||||
location_list = list(placement_query)
|
||||
|
||||
images = {}
|
||||
for location in location_list:
|
||||
# Make sure we're always retrieving the same image object.
|
||||
image = location.storage.image
|
||||
|
||||
# Set the storage to the one we got from the location, to prevent another query
|
||||
image.storage = location.storage
|
||||
|
||||
if not image.id in images:
|
||||
images[image.id] = image
|
||||
image.storage.locations = set()
|
||||
else:
|
||||
image = images[image.id]
|
||||
|
||||
# Add the location to the image's locations set.
|
||||
image.storage.locations.add(location.location.name)
|
||||
|
||||
return images.values()
|
||||
|
||||
|
||||
def lookup_repository_images(repo, docker_image_ids):
|
||||
return (Image
|
||||
.select(Image, ImageStorage)
|
||||
|
@ -177,13 +164,6 @@ def lookup_repository_images(repo, docker_image_ids):
|
|||
.where(Image.repository == repo, Image.docker_image_id << docker_image_ids))
|
||||
|
||||
|
||||
def get_matching_repository_images(namespace_name, repository_name, docker_image_ids):
|
||||
def modify_query(query):
|
||||
return query.where(Image.docker_image_id << list(docker_image_ids))
|
||||
|
||||
return get_repository_images_base(namespace_name, repository_name, modify_query)
|
||||
|
||||
|
||||
def get_repository_images_without_placements(repo_obj, with_ancestor=None):
|
||||
query = (Image
|
||||
.select(Image, ImageStorage)
|
||||
|
@ -199,15 +179,8 @@ def get_repository_images_without_placements(repo_obj, with_ancestor=None):
|
|||
|
||||
|
||||
def get_repository_images(namespace_name, repository_name):
|
||||
return get_repository_images_base(namespace_name, repository_name, lambda q: q)
|
||||
|
||||
|
||||
def get_image_by_id(namespace_name, repository_name, docker_image_id):
|
||||
image = get_repo_image_extended(namespace_name, repository_name, docker_image_id)
|
||||
if not image:
|
||||
raise InvalidImageException('Unable to find image \'%s\' for repo \'%s/%s\'' %
|
||||
(docker_image_id, namespace_name, repository_name))
|
||||
return image
|
||||
""" Returns all the repository images in the repository. Does not include storage objects. """
|
||||
return _get_repository_images(namespace_name, repository_name, lambda q: q)
|
||||
|
||||
|
||||
def __translate_ancestry(old_ancestry, translations, repo_obj, username, preferred_location):
|
||||
|
@ -253,8 +226,6 @@ def _find_or_link_image(existing_image, repo_obj, username, translations, prefer
|
|||
username, preferred_location)
|
||||
|
||||
copied_storage = to_copy.storage
|
||||
copied_storage.locations = {placement.location.name
|
||||
for placement in copied_storage.imagestorageplacement_set}
|
||||
|
||||
translated_parent_id = None
|
||||
if new_image_ancestry != '/':
|
||||
|
@ -403,24 +374,6 @@ def get_repo_image_by_storage_checksum(namespace, repository_name, storage_check
|
|||
raise InvalidImageException(msg)
|
||||
|
||||
|
||||
def get_image_layers(image):
|
||||
""" Returns a list of the full layers of an image, including itself (if specified), sorted
|
||||
from base image outward. """
|
||||
image_ids = image.ancestor_id_list() + [image.id]
|
||||
|
||||
query = (ImageStoragePlacement
|
||||
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
|
||||
.join(ImageStorageLocation)
|
||||
.switch(ImageStoragePlacement)
|
||||
.join(ImageStorage, JOIN_LEFT_OUTER)
|
||||
.join(Image)
|
||||
.where(Image.id << image_ids))
|
||||
|
||||
image_list = list(invert_placement_query_results(query))
|
||||
image_list.sort(key=lambda img: image_ids.index(img.id))
|
||||
return image_list
|
||||
|
||||
|
||||
def synthesize_v1_image(repo, image_storage_id, storage_image_size, docker_image_id,
|
||||
created_date_str, comment, command, v1_json_metadata, parent_image=None):
|
||||
""" Find an existing image with this docker image id, and if none exists, write one with the
|
||||
|
@ -507,6 +460,7 @@ def get_image_with_storage_and_parent_base():
|
|||
.join(Parent, JOIN_LEFT_OUTER, on=(Image.parent == Parent.id))
|
||||
.join(ParentImageStorage, JOIN_LEFT_OUTER, on=(ParentImageStorage.id == Parent.storage)))
|
||||
|
||||
|
||||
def set_secscan_status(image, indexed, version):
|
||||
query = (Image
|
||||
.select()
|
||||
|
|
|
@ -44,7 +44,7 @@ class PreOCIModel(ImageInterface):
|
|||
return [_build_image(image) for image in all_images]
|
||||
|
||||
def get_repository_image(self, namespace_name, repo_name, docker_image_id):
|
||||
image = model.image.get_repo_image_extended(namespace_name, repo_name, docker_image_id)
|
||||
image = model.image.get_repo_image_and_storage(namespace_name, repo_name, docker_image_id)
|
||||
if not image:
|
||||
return None
|
||||
|
||||
|
|
|
@ -11,10 +11,12 @@ class PreOCIModel(DockerRegistryV1DataInterface):
|
|||
"""
|
||||
|
||||
def placement_locations_and_path_docker_v1(self, namespace_name, repo_name, image_id):
|
||||
repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id)
|
||||
if not repo_image or repo_image.storage is None:
|
||||
image, placements = model.image.get_image_and_placements(namespace_name, repo_name, image_id)
|
||||
if image is None:
|
||||
return None, None
|
||||
return repo_image.storage.locations, model.storage.get_layer_path(repo_image.storage)
|
||||
|
||||
locations = {placement.location.name for placement in placements}
|
||||
return locations, model.storage.get_layer_path(image.storage)
|
||||
|
||||
def docker_v1_metadata(self, namespace_name, repo_name, image_id):
|
||||
repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id)
|
||||
|
|
|
@ -30,8 +30,9 @@ class PreOCIModel(VerbsDataInterface):
|
|||
repo_image_record = model.image.get_image_by_id(
|
||||
repo_image.repository.namespace_name, repo_image.repository.name, repo_image.image_id)
|
||||
|
||||
parents = model.image.get_parent_images_with_placements(
|
||||
parents = model.image.get_parent_images(
|
||||
repo_image.repository.namespace_name, repo_image.repository.name, repo_image_record)
|
||||
placements_map = model.image.get_placements_for_images(parents)
|
||||
|
||||
yield repo_image
|
||||
|
||||
|
@ -42,9 +43,10 @@ class PreOCIModel(VerbsDataInterface):
|
|||
except ValueError:
|
||||
pass
|
||||
|
||||
locations = [placement.location.name for placement in placements_map[parent.storage.id]]
|
||||
yield ImageWithBlob(
|
||||
image_id=parent.docker_image_id,
|
||||
blob=_blob(parent.storage),
|
||||
blob=_blob(parent.storage, locations=locations),
|
||||
repository=repo_image.repository,
|
||||
compat_metadata=metadata,
|
||||
v1_metadata=_docker_v1_metadata(repo_image.repository.namespace_name,
|
||||
|
@ -175,15 +177,11 @@ def _derived_image(blob_record, repo_image):
|
|||
internal_source_image_db_id=repo_image.internal_db_id,)
|
||||
|
||||
|
||||
def _blob(blob_record):
|
||||
def _blob(blob_record, locations=None):
|
||||
"""
|
||||
Returns a Blob object for the given Pre-OCI data model blob instance.
|
||||
"""
|
||||
if hasattr(blob_record, 'locations'):
|
||||
locations = blob_record.locations
|
||||
else:
|
||||
locations = model.storage.get_storage_locations(blob_record.uuid)
|
||||
|
||||
locations = locations or model.storage.get_storage_locations(blob_record.uuid)
|
||||
return Blob(
|
||||
uuid=blob_record.uuid,
|
||||
size=blob_record.image_size,
|
||||
|
|
|
@ -584,7 +584,7 @@ def populate_database(minimal=False, with_storage=False):
|
|||
'Complex repository with many branches and tags.',
|
||||
False, [(new_user_2, 'read'), (dtrobot[0], 'read')],
|
||||
(2, [(3, [], 'v2.0'),
|
||||
(1, [(1, [(1, [], ['prod'])],
|
||||
(1, [(1, [(2, [], ['prod'])],
|
||||
'staging'),
|
||||
(1, [], None)], None)], None))
|
||||
|
||||
|
|
|
@ -2829,7 +2829,7 @@ class TestListAndDeleteTag(ApiTestCase):
|
|||
repository=ADMIN_ACCESS_USER + '/complex', tag='staging'))
|
||||
|
||||
staging_images = json['images']
|
||||
assert len(prod_images) == len(staging_images) + 1
|
||||
assert len(prod_images) == len(staging_images) + 2
|
||||
|
||||
# Delete prod.
|
||||
self.deleteEmptyResponse(RepositoryTag,
|
||||
|
|
|
@ -58,7 +58,7 @@ class TestImageTree(unittest.TestCase):
|
|||
return True
|
||||
|
||||
result = tree.find_longest_path(base_image.id, checker)
|
||||
self.assertEquals(4, len(result))
|
||||
self.assertEquals(5, len(result))
|
||||
self.assertEquals('prod', tree.tag_containing_image(result[-1]))
|
||||
|
||||
def test_filtering(self):
|
||||
|
@ -74,23 +74,6 @@ class TestImageTree(unittest.TestCase):
|
|||
result = tree.find_longest_path(base_image.id, checker)
|
||||
self.assertEquals(0, len(result))
|
||||
|
||||
def test_find_tag_parent_image(self):
|
||||
all_images = list(model.image.get_repository_images(NAMESPACE, COMPLEX_REPO))
|
||||
all_tags = list(model.tag.list_repository_tags(NAMESPACE, COMPLEX_REPO))
|
||||
tree = ImageTree(all_images, all_tags)
|
||||
|
||||
base_image = self._get_base_image(all_images)
|
||||
|
||||
def checker(index, image):
|
||||
return True
|
||||
|
||||
result = tree.find_longest_path(base_image.id, checker)
|
||||
self.assertEquals(4, len(result))
|
||||
|
||||
# Only use the first two images. They don't have tags, but the method should
|
||||
# still return the tag that contains them.
|
||||
self.assertEquals('staging', tree.tag_containing_image(result[0]))
|
||||
|
||||
def test_longest_path_simple_repo_direct_lookup(self):
|
||||
repository = model.repository.get_repository(NAMESPACE, SIMPLE_REPO)
|
||||
all_images = list(model.image.get_repository_images(NAMESPACE, SIMPLE_REPO))
|
||||
|
|
Reference in a new issue