From 81466467610261d92e36535a1730b2ac4b986b00 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 2 Apr 2018 14:16:43 -0400 Subject: [PATCH] Simplifying queries around images and placements Only verbs needs to load placements for multiple images, so we can vastly simplify and optimize most queries by making it two-step, and having the rest of the image loads not worry about placements --- data/model/image.py | 172 ++++++++++---------------- endpoints/api/image_models_pre_oci.py | 2 +- endpoints/v1/models_pre_oci.py | 8 +- endpoints/verbs/models_pre_oci.py | 14 +-- initdb.py | 2 +- test/test_api_usage.py | 2 +- test/test_imagetree.py | 19 +-- 7 files changed, 78 insertions(+), 141 deletions(-) diff --git a/data/model/image.py b/data/model/image.py index ad5f6d2b5..fb022ee88 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -1,9 +1,11 @@ import logging import hashlib import json + +from collections import defaultdict +from datetime import datetime import dateutil.parser -from datetime import datetime from peewee import JOIN_LEFT_OUTER, IntegrityError, fn from data.model import (DataModelException, db_transaction, _basequery, storage, @@ -31,23 +33,10 @@ def get_image_with_storage(docker_image_id, storage_uuid): return None -def get_parent_images_with_placements(namespace_name, repository_name, image_obj): - """ Returns a list of parent Image objects starting with the most recent parent - and ending with the base layer. The images in this query will include the storage and - placements. - """ - return _get_parent_images(namespace_name, repository_name, image_obj, include_placements=True) - - def get_parent_images(namespace_name, repository_name, image_obj): """ Returns a list of parent Image objects starting with the most recent parent - and ending with the base layer. The images in this query will include the storage but - not the placements. + and ending with the base layer. The images in this query will include the storage. """ - return _get_parent_images(namespace_name, repository_name, image_obj, include_placements=False) - - -def _get_parent_images(namespace_name, repository_name, image_obj, include_placements=False): parents = image_obj.ancestors # Ancestors are in the format ///...//, with each path section @@ -59,12 +48,8 @@ def _get_parent_images(namespace_name, repository_name, image_obj, include_place def filter_to_parents(query): return query.where(Image.id << parent_db_ids) - if include_placements: - parents = get_repository_images_base(namespace_name, repository_name, filter_to_parents) - else: - parents = _get_repository_images_and_storages(namespace_name, repository_name, - filter_to_parents) - + parents = _get_repository_images_and_storages(namespace_name, repository_name, + filter_to_parents) id_to_image = {unicode(image.id): image for image in parents} try: return [id_to_image[parent_id] for parent_id in reversed(parent_db_ids)] @@ -72,7 +57,47 @@ def _get_parent_images(namespace_name, repository_name, image_obj, include_place raise DataModelException('Unknown parent image') +def get_placements_for_images(images): + """ Returns the placements for the given images, as a map from image storage ID to placements. """ + if not images: + return {} + + query = (ImageStoragePlacement + .select(ImageStoragePlacement, ImageStorageLocation, ImageStorage) + .join(ImageStorageLocation) + .switch(ImageStoragePlacement) + .join(ImageStorage) + .where(ImageStorage.id << [image.storage_id for image in images])) + + placement_map = defaultdict(list) + for placement in query: + placement_map[placement.storage.id].append(placement) + + return dict(placement_map) + + +def get_image_and_placements(namespace_name, repo_name, docker_image_id): + """ Returns the repo image (with a storage object) and storage placements for the image + or (None, None) if non found. + """ + repo_image = get_repo_image_and_storage(namespace_name, repo_name, docker_image_id) + if repo_image is None: + return (None, None) + + query = (ImageStoragePlacement + .select(ImageStoragePlacement, ImageStorageLocation) + .join(ImageStorageLocation) + .switch(ImageStoragePlacement) + .join(ImageStorage) + .where(ImageStorage.id == repo_image.storage_id)) + + return repo_image, list(query) + + def get_repo_image(namespace_name, repository_name, docker_image_id): + """ Returns the repository image with the given Docker image ID or None if none. + Does not include the storage object. + """ def limit_to_image_id(query): return query.where(Image.docker_image_id == docker_image_id).limit(1) @@ -83,18 +108,10 @@ def get_repo_image(namespace_name, repository_name, docker_image_id): return None -def get_repo_image_extended(namespace_name, repository_name, docker_image_id): - def limit_to_image_id(query): - return query.where(Image.docker_image_id == docker_image_id) - - images = get_repository_images_base(namespace_name, repository_name, limit_to_image_id) - if not images: - return None - - return images[0] - - def get_repo_image_and_storage(namespace_name, repository_name, docker_image_id): + """ Returns the repository image with the given Docker image ID or None if none. + Includes the storage object. + """ def limit_to_image_id(query): return query.where(Image.docker_image_id == docker_image_id) @@ -105,6 +122,17 @@ def get_repo_image_and_storage(namespace_name, repository_name, docker_image_id) return images[0] +def get_image_by_id(namespace_name, repository_name, docker_image_id): + """ Returns the repository image with the given Docker image ID or raises if not found. + Includes the storage object. + """ + image = get_repo_image_and_storage(namespace_name, repository_name, docker_image_id) + if not image: + raise InvalidImageException('Unable to find image \'%s\' for repo \'%s/%s\'' % + (docker_image_id, namespace_name, repository_name)) + return image + + def _get_repository_images_and_storages(namespace_name, repository_name, query_modifier): query = (Image .select(Image, ImageStorage) @@ -129,47 +157,6 @@ def _get_repository_images(namespace_name, repository_name, query_modifier): return query -def get_repository_images_base(namespace_name, repository_name, query_modifier): - query = (ImageStoragePlacement - .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation) - .join(ImageStorageLocation) - .switch(ImageStoragePlacement) - .join(ImageStorage, JOIN_LEFT_OUTER) - .join(Image) - .join(Repository) - .join(Namespace, on=(Repository.namespace_user == Namespace.id)) - .where(Repository.name == repository_name, Namespace.username == namespace_name)) - - query = query_modifier(query) - return invert_placement_query_results(query) - - -def invert_placement_query_results(placement_query): - """ This method will take a query which returns placements, storages, and images, and have it - return images and their storages, along with the placement set on each storage. - """ - location_list = list(placement_query) - - images = {} - for location in location_list: - # Make sure we're always retrieving the same image object. - image = location.storage.image - - # Set the storage to the one we got from the location, to prevent another query - image.storage = location.storage - - if not image.id in images: - images[image.id] = image - image.storage.locations = set() - else: - image = images[image.id] - - # Add the location to the image's locations set. - image.storage.locations.add(location.location.name) - - return images.values() - - def lookup_repository_images(repo, docker_image_ids): return (Image .select(Image, ImageStorage) @@ -177,13 +164,6 @@ def lookup_repository_images(repo, docker_image_ids): .where(Image.repository == repo, Image.docker_image_id << docker_image_ids)) -def get_matching_repository_images(namespace_name, repository_name, docker_image_ids): - def modify_query(query): - return query.where(Image.docker_image_id << list(docker_image_ids)) - - return get_repository_images_base(namespace_name, repository_name, modify_query) - - def get_repository_images_without_placements(repo_obj, with_ancestor=None): query = (Image .select(Image, ImageStorage) @@ -199,15 +179,8 @@ def get_repository_images_without_placements(repo_obj, with_ancestor=None): def get_repository_images(namespace_name, repository_name): - return get_repository_images_base(namespace_name, repository_name, lambda q: q) - - -def get_image_by_id(namespace_name, repository_name, docker_image_id): - image = get_repo_image_extended(namespace_name, repository_name, docker_image_id) - if not image: - raise InvalidImageException('Unable to find image \'%s\' for repo \'%s/%s\'' % - (docker_image_id, namespace_name, repository_name)) - return image + """ Returns all the repository images in the repository. Does not include storage objects. """ + return _get_repository_images(namespace_name, repository_name, lambda q: q) def __translate_ancestry(old_ancestry, translations, repo_obj, username, preferred_location): @@ -253,8 +226,6 @@ def _find_or_link_image(existing_image, repo_obj, username, translations, prefer username, preferred_location) copied_storage = to_copy.storage - copied_storage.locations = {placement.location.name - for placement in copied_storage.imagestorageplacement_set} translated_parent_id = None if new_image_ancestry != '/': @@ -403,24 +374,6 @@ def get_repo_image_by_storage_checksum(namespace, repository_name, storage_check raise InvalidImageException(msg) -def get_image_layers(image): - """ Returns a list of the full layers of an image, including itself (if specified), sorted - from base image outward. """ - image_ids = image.ancestor_id_list() + [image.id] - - query = (ImageStoragePlacement - .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation) - .join(ImageStorageLocation) - .switch(ImageStoragePlacement) - .join(ImageStorage, JOIN_LEFT_OUTER) - .join(Image) - .where(Image.id << image_ids)) - - image_list = list(invert_placement_query_results(query)) - image_list.sort(key=lambda img: image_ids.index(img.id)) - return image_list - - def synthesize_v1_image(repo, image_storage_id, storage_image_size, docker_image_id, created_date_str, comment, command, v1_json_metadata, parent_image=None): """ Find an existing image with this docker image id, and if none exists, write one with the @@ -507,6 +460,7 @@ def get_image_with_storage_and_parent_base(): .join(Parent, JOIN_LEFT_OUTER, on=(Image.parent == Parent.id)) .join(ParentImageStorage, JOIN_LEFT_OUTER, on=(ParentImageStorage.id == Parent.storage))) + def set_secscan_status(image, indexed, version): query = (Image .select() diff --git a/endpoints/api/image_models_pre_oci.py b/endpoints/api/image_models_pre_oci.py index 2561672d2..e29c294b0 100644 --- a/endpoints/api/image_models_pre_oci.py +++ b/endpoints/api/image_models_pre_oci.py @@ -44,7 +44,7 @@ class PreOCIModel(ImageInterface): return [_build_image(image) for image in all_images] def get_repository_image(self, namespace_name, repo_name, docker_image_id): - image = model.image.get_repo_image_extended(namespace_name, repo_name, docker_image_id) + image = model.image.get_repo_image_and_storage(namespace_name, repo_name, docker_image_id) if not image: return None diff --git a/endpoints/v1/models_pre_oci.py b/endpoints/v1/models_pre_oci.py index 816369089..030c90545 100644 --- a/endpoints/v1/models_pre_oci.py +++ b/endpoints/v1/models_pre_oci.py @@ -11,10 +11,12 @@ class PreOCIModel(DockerRegistryV1DataInterface): """ def placement_locations_and_path_docker_v1(self, namespace_name, repo_name, image_id): - repo_image = model.image.get_repo_image_extended(namespace_name, repo_name, image_id) - if not repo_image or repo_image.storage is None: + image, placements = model.image.get_image_and_placements(namespace_name, repo_name, image_id) + if image is None: return None, None - return repo_image.storage.locations, model.storage.get_layer_path(repo_image.storage) + + locations = {placement.location.name for placement in placements} + return locations, model.storage.get_layer_path(image.storage) def docker_v1_metadata(self, namespace_name, repo_name, image_id): repo_image = model.image.get_repo_image(namespace_name, repo_name, image_id) diff --git a/endpoints/verbs/models_pre_oci.py b/endpoints/verbs/models_pre_oci.py index 26a955603..4d233bfbe 100644 --- a/endpoints/verbs/models_pre_oci.py +++ b/endpoints/verbs/models_pre_oci.py @@ -30,8 +30,9 @@ class PreOCIModel(VerbsDataInterface): repo_image_record = model.image.get_image_by_id( repo_image.repository.namespace_name, repo_image.repository.name, repo_image.image_id) - parents = model.image.get_parent_images_with_placements( + parents = model.image.get_parent_images( repo_image.repository.namespace_name, repo_image.repository.name, repo_image_record) + placements_map = model.image.get_placements_for_images(parents) yield repo_image @@ -42,9 +43,10 @@ class PreOCIModel(VerbsDataInterface): except ValueError: pass + locations = [placement.location.name for placement in placements_map[parent.storage.id]] yield ImageWithBlob( image_id=parent.docker_image_id, - blob=_blob(parent.storage), + blob=_blob(parent.storage, locations=locations), repository=repo_image.repository, compat_metadata=metadata, v1_metadata=_docker_v1_metadata(repo_image.repository.namespace_name, @@ -175,15 +177,11 @@ def _derived_image(blob_record, repo_image): internal_source_image_db_id=repo_image.internal_db_id,) -def _blob(blob_record): +def _blob(blob_record, locations=None): """ Returns a Blob object for the given Pre-OCI data model blob instance. """ - if hasattr(blob_record, 'locations'): - locations = blob_record.locations - else: - locations = model.storage.get_storage_locations(blob_record.uuid) - + locations = locations or model.storage.get_storage_locations(blob_record.uuid) return Blob( uuid=blob_record.uuid, size=blob_record.image_size, diff --git a/initdb.py b/initdb.py index 378a22e93..78b60ffc3 100644 --- a/initdb.py +++ b/initdb.py @@ -584,7 +584,7 @@ def populate_database(minimal=False, with_storage=False): 'Complex repository with many branches and tags.', False, [(new_user_2, 'read'), (dtrobot[0], 'read')], (2, [(3, [], 'v2.0'), - (1, [(1, [(1, [], ['prod'])], + (1, [(1, [(2, [], ['prod'])], 'staging'), (1, [], None)], None)], None)) diff --git a/test/test_api_usage.py b/test/test_api_usage.py index 361cd2d8b..f6de636a8 100644 --- a/test/test_api_usage.py +++ b/test/test_api_usage.py @@ -2829,7 +2829,7 @@ class TestListAndDeleteTag(ApiTestCase): repository=ADMIN_ACCESS_USER + '/complex', tag='staging')) staging_images = json['images'] - assert len(prod_images) == len(staging_images) + 1 + assert len(prod_images) == len(staging_images) + 2 # Delete prod. self.deleteEmptyResponse(RepositoryTag, diff --git a/test/test_imagetree.py b/test/test_imagetree.py index 44b0fc10c..ff62a52d7 100644 --- a/test/test_imagetree.py +++ b/test/test_imagetree.py @@ -58,7 +58,7 @@ class TestImageTree(unittest.TestCase): return True result = tree.find_longest_path(base_image.id, checker) - self.assertEquals(4, len(result)) + self.assertEquals(5, len(result)) self.assertEquals('prod', tree.tag_containing_image(result[-1])) def test_filtering(self): @@ -74,23 +74,6 @@ class TestImageTree(unittest.TestCase): result = tree.find_longest_path(base_image.id, checker) self.assertEquals(0, len(result)) - def test_find_tag_parent_image(self): - all_images = list(model.image.get_repository_images(NAMESPACE, COMPLEX_REPO)) - all_tags = list(model.tag.list_repository_tags(NAMESPACE, COMPLEX_REPO)) - tree = ImageTree(all_images, all_tags) - - base_image = self._get_base_image(all_images) - - def checker(index, image): - return True - - result = tree.find_longest_path(base_image.id, checker) - self.assertEquals(4, len(result)) - - # Only use the first two images. They don't have tags, but the method should - # still return the tag that contains them. - self.assertEquals('staging', tree.tag_containing_image(result[0])) - def test_longest_path_simple_repo_direct_lookup(self): repository = model.repository.get_repository(NAMESPACE, SIMPLE_REPO) all_images = list(model.image.get_repository_images(NAMESPACE, SIMPLE_REPO))