From 1d8b72235a92a2a56d1a7b94b006c8d9a9feccef Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Fri, 26 Aug 2016 14:46:18 -0400 Subject: [PATCH 1/5] Add a helper method to Image to parse ancestor string. --- data/database.py | 6 ++++++ data/model/image.py | 6 ++---- data/model/repository.py | 2 +- endpoints/api/image.py | 24 +++++++++++------------ util/imagetree.py | 25 +++++++++++------------- util/migrate/backfill_aggregate_sizes.py | 2 +- 6 files changed, 33 insertions(+), 32 deletions(-) diff --git a/data/database.py b/data/database.py index 367dac336..8e34d21ea 100644 --- a/data/database.py +++ b/data/database.py @@ -616,6 +616,12 @@ class Image(BaseModel): (('security_indexed_engine', 'security_indexed'), False), ) + def ancestor_id_list(self): + """ Returns an integer list of ancestor ids, ordered chronologically from + root to direct parent. + """ + return map(int, self.ancestors.split('/')[1:-1]) + _ImageProxy.initialize(Image) diff --git a/data/model/image.py b/data/model/image.py index c11e7178b..2fc8f837d 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -397,9 +397,7 @@ def get_repo_image_by_storage_checksum(namespace, repository_name, storage_check def get_image_layers(image): """ Returns a list of the full layers of an image, including itself (if specified), sorted from base image outward. """ - ancestors = image.ancestors.split('/')[1:-1] - image_ids = [ancestor_id for ancestor_id in ancestors if ancestor_id] - image_ids.append(str(image.id)) + image_ids = image.ancestor_id_list() + [image.id] query = (ImageStoragePlacement .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation) @@ -410,7 +408,7 @@ def get_image_layers(image): .where(Image.id << image_ids)) image_list = list(invert_placement_query_results(query)) - image_list.sort(key=lambda image: image_ids.index(str(image.id))) + image_list.sort(key=lambda img: image_ids.index(img.id)) return image_list diff --git a/data/model/repository.py b/data/model/repository.py index aaceb74e6..e0bf7234b 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -157,7 +157,7 @@ def garbage_collect_repo(repo): def gen_referenced_ancestors(): for tagged_image in tagged_images: # The ancestor list is in the format '/1/2/3/', extract just the ids - ancestor_id_strings = tagged_image.ancestors.split('/')[1:-1] + ancestor_id_strings = tagged_image.ancestor_list() for img_id_str in ancestor_id_strings: yield int(img_id_str) yield tagged_image.id diff --git a/endpoints/api/image.py b/endpoints/api/image.py index 48875b271..0d6e59425 100644 --- a/endpoints/api/image.py +++ b/endpoints/api/image.py @@ -13,7 +13,7 @@ def image_view(image, image_map, include_ancestors=True): command = image.command def docker_id(aid): - if not aid or not aid in image_map: + if aid not in image_map: return '' return image_map[aid].docker_image_id @@ -30,14 +30,14 @@ def image_view(image, image_map, include_ancestors=True): if include_ancestors: # Calculate the ancestors string, with the DBID's replaced with the docker IDs. - ancestors = [docker_id(a) for a in image.ancestors.split('/')] - image_data['ancestors'] = '/'.join(ancestors) + ancestors = [docker_id(a) for a in image.ancestor_id_list()] + image_data['ancestors'] = '/{0}/'.format('/'.join(ancestors)) return image_data def historical_image_view(image, image_map): - ancestors = [image_map[a] for a in image.ancestors.split('/')[1:-1]] + ancestors = [image_map[a] for a in image.ancestor_id_list()] normal_view = image_view(image, image_map) normal_view['history'] = [image_view(parent, image_map, False) for parent in ancestors] return normal_view @@ -58,23 +58,23 @@ class RepositoryImageList(RepositoryParamResource): all_images = model.image.get_repository_images_without_placements(repo) all_tags = model.tag.list_repository_tags(namespace, repository) - tags_by_image_id = defaultdict(list) + tags_by_docker_id = defaultdict(list) found_image_ids = set() for tag in all_tags: - tags_by_image_id[tag.image.docker_image_id].append(tag.name) - found_image_ids.add(str(tag.image.id)) - found_image_ids.update(tag.image.ancestors.split('/')[1:-1]) + tags_by_docker_id[tag.image.docker_image_id].append(tag.name) + found_image_ids.add(tag.image.id) + found_image_ids.update(tag.image.ancestor_id_list()) image_map = {} filtered_images = [] for image in all_images: - if str(image.id) in found_image_ids: - image_map[str(image.id)] = image + if image.id in found_image_ids: + image_map[image.id] = image filtered_images.append(image) def add_tags(image_json): - image_json['tags'] = tags_by_image_id[image_json['id']] + image_json['tags'] = tags_by_docker_id[image_json['id']] return image_json return { @@ -98,7 +98,7 @@ class RepositoryImage(RepositoryParamResource): # Lookup all the ancestor images for the image. image_map = {} for current_image in model.image.get_parent_images(namespace, repository, image): - image_map[str(current_image.id)] = current_image + image_map[current_image.id] = current_image return historical_image_view(image, image_map) diff --git a/util/imagetree.py b/util/imagetree.py index a45ac29d1..9df68b3d1 100644 --- a/util/imagetree.py +++ b/util/imagetree.py @@ -1,3 +1,6 @@ +from collections import defaultdict + + class ImageTreeNode(object): """ A node in the image tree. """ def __init__(self, image, child_map): @@ -9,7 +12,7 @@ class ImageTreeNode(object): @property def children(self): - return self._child_map.get(str(self.image.id), []) + return self._child_map[self.image.id] def add_tag(self, tag): self.tags.append(tag) @@ -20,18 +23,18 @@ class ImageTree(object): def __init__(self, all_images, all_tags, base_filter=None): self._image_map = {} - self._child_map = {} + self._child_map = defaultdict(list) self._build(all_images, all_tags, base_filter) - def _build(self, all_images, all_tags, base_filter=None): + def _build(self, all_images, all_tags, base_filter=None): # Build nodes for each of the images. for image in all_images: - ancestors = image.ancestors.split('/')[1:-1] + ancestors = image.ancestor_id_list() # Filter any unneeded images. if base_filter is not None: - if image.id != base_filter and not str(base_filter) in ancestors: + if image.id != base_filter and not base_filter in ancestors: continue # Create the node for the image. @@ -39,11 +42,8 @@ class ImageTree(object): self._image_map[image.id] = image_node # Add the node to the child map for its parent image (if any). - parent_image_id = image.ancestors.split('/')[-2] if image.ancestors else None - if parent_image_id: - if not parent_image_id in self._child_map: - self._child_map[parent_image_id] = [] - + parent_image_id = image.parent_id + if parent_image_id is not None: self._child_map[parent_image_id].append(image_node) # Build the tag map. @@ -54,7 +54,6 @@ class ImageTree(object): image_node.add_tag(tag.name) - def find_longest_path(self, image_id, checker): """ Returns a list of images representing the longest path that matches the given checker function, starting from the given image_id *exclusive*. @@ -65,7 +64,6 @@ class ImageTree(object): return self._find_longest_path(start_node, checker, -1)[1:] - def _find_longest_path(self, image_node, checker, index): found_path = [] @@ -79,7 +77,6 @@ class ImageTree(object): return [image_node.image] + found_path - def tag_containing_image(self, image): """ Returns the name of the closest tag containing the given image. """ if not image: @@ -99,4 +96,4 @@ class ImageTree(object): if found is not None: return found - return None \ No newline at end of file + return None diff --git a/util/migrate/backfill_aggregate_sizes.py b/util/migrate/backfill_aggregate_sizes.py index db6f195f2..b19314e1a 100644 --- a/util/migrate/backfill_aggregate_sizes.py +++ b/util/migrate/backfill_aggregate_sizes.py @@ -35,7 +35,7 @@ def backfill_aggregate_sizes(): aggregate_size = image.storage.image_size - image_ids = image.ancestors.split('/')[1:-1] + image_ids = image.ancestor_id_list() for image_id in image_ids: to_add = db_for_update(Image .select(Image, ImageStorage) From 0815f6b6c4c2a39327970dfc420d46d29f9014b7 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Fri, 26 Aug 2016 14:47:59 -0400 Subject: [PATCH 2/5] Fix indentation for DB queries. --- data/model/tag.py | 18 ++++++++++-------- test/test_gc.py | 14 ++++++++------ 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/data/model/tag.py b/data/model/tag.py index cf5e0fbc8..125339697 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -192,10 +192,11 @@ def _get_repo_tag_image(tag_name, include_storage, modifier): query = Image.select().join(RepositoryTag) if include_storage: - query = (Image.select(Image, ImageStorage) - .join(ImageStorage) - .switch(Image) - .join(RepositoryTag)) + query = (Image + .select(Image, ImageStorage) + .join(ImageStorage) + .switch(Image) + .join(RepositoryTag)) images = _tag_alive(modifier(query.where(RepositoryTag.name == tag_name))) if not images: @@ -213,10 +214,11 @@ def get_repo_tag_image(repo, tag_name, include_storage=False): def get_tag_image(namespace_name, repository_name, tag_name, include_storage=False): def modifier(query): - return (query.switch(RepositoryTag) - .join(Repository) - .join(Namespace) - .where(Namespace.username == namespace_name, Repository.name == repository_name)) + return (query + .switch(RepositoryTag) + .join(Repository) + .join(Namespace) + .where(Namespace.username == namespace_name, Repository.name == repository_name)) return _get_repo_tag_image(tag_name, include_storage, modifier) diff --git a/test/test_gc.py b/test/test_gc.py index e5d7cef22..8be27da4b 100644 --- a/test/test_gc.py +++ b/test/test_gc.py @@ -183,9 +183,10 @@ class TestGarbageCollection(unittest.TestCase): model.repository.purge_repository(repo.namespace_user.username, repo.name) # Change the time machine expiration on the namespace. - (database.User.update(removed_tag_expiration_s=1000000000) - .where(database.User.username == ADMIN_ACCESS_USER) - .execute()) + (database.User + .update(removed_tag_expiration_s=1000000000) + .where(database.User.username == ADMIN_ACCESS_USER) + .execute()) # Create a repository without any garbage. repository = self.createRepository(latest=['i1', 'i2', 'i3']) @@ -200,9 +201,10 @@ class TestGarbageCollection(unittest.TestCase): self.assertIsNone(model.repository.find_repository_with_garbage(1000000000)) # Change the time machine expiration on the namespace. - (database.User.update(removed_tag_expiration_s=0) - .where(database.User.username == ADMIN_ACCESS_USER) - .execute()) + (database.User + .update(removed_tag_expiration_s=0) + .where(database.User.username == ADMIN_ACCESS_USER) + .execute()) # Now we should find the repository for GC. repository = model.repository.find_repository_with_garbage(0) From 584a5a7ddd10e75064584fd4b7d00f0735479de6 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Fri, 26 Aug 2016 14:48:39 -0400 Subject: [PATCH 3/5] Reduce database bandwidth by tracking gc candidate images. --- data/database.py | 29 ++++++++++ data/model/repository.py | 115 ++++++++++++++++++--------------------- data/model/tag.py | 112 ++++++++++++++++++++++++-------------- test/test_gc.py | 9 ++- test/test_manifests.py | 3 +- 5 files changed, 161 insertions(+), 107 deletions(-) diff --git a/data/database.py b/data/database.py index 8e34d21ea..935fad2a6 100644 --- a/data/database.py +++ b/data/database.py @@ -34,6 +34,7 @@ _SCHEME_DRIVERS = { 'postgresql+psycopg2': PostgresqlDatabase, } + SCHEME_RANDOM_FUNCTION = { 'mysql': fn.Rand, 'mysql+pymysql': fn.Rand, @@ -42,12 +43,37 @@ SCHEME_RANDOM_FUNCTION = { 'postgresql+psycopg2': fn.Random, } + +def pipes_concat(arg1, arg2, *extra_args): + """ Concat function for sqlite, since it doesn't support fn.Concat. + Concatenates clauses with || characters. + """ + reduced = arg1.concat(arg2) + for arg in extra_args: + reduced = reduced.concat(arg) + return reduced + + +def function_concat(arg1, arg2, *extra_args): + """ Default implementation of concat which uses fn.Concat(). Used by all + database engines except sqlite. + """ + return fn.Concat(arg1, arg2, *extra_args) + + +SCHEME_SPECIALIZED_CONCAT = { + 'sqlite': pipes_concat, +} + + def real_for_update(query): return query.for_update() + def null_for_update(query): return query + def delete_instance_filtered(instance, model_class, delete_nullable, skip_transitive_deletes): """ Deletes the DB instance recursively, skipping any models in the skip_transitive_deletes set. @@ -181,6 +207,7 @@ read_slave = Proxy() db_random_func = CallableProxy() db_for_update = CallableProxy() db_transaction = CallableProxy() +db_concat_func = CallableProxy() def validate_database_url(url, db_kwargs, connect_timeout=5): @@ -227,6 +254,8 @@ def configure(config_object): db_random_func.initialize(SCHEME_RANDOM_FUNCTION[parsed_write_uri.drivername]) db_for_update.initialize(SCHEME_SPECIALIZED_FOR_UPDATE.get(parsed_write_uri.drivername, real_for_update)) + db_concat_func.initialize(SCHEME_SPECIALIZED_CONCAT.get(parsed_write_uri.drivername, + function_concat)) read_slave_uri = config_object.get('DB_READ_SLAVE_URI', None) if read_slave_uri is not None: diff --git a/data/model/repository.py b/data/model/repository.py index e0bf7234b..75b4b65c7 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -11,7 +11,7 @@ from data.database import (Repository, Namespace, RepositoryTag, Star, Image, Us Visibility, RepositoryPermission, RepositoryActionCount, Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage, Label, TagManifestLabel, db_for_update, get_epoch_timestamp, - db_random_func) + db_random_func, db_concat_func) logger = logging.getLogger(__name__) @@ -43,45 +43,21 @@ def get_repository(namespace_name, repository_name): return None -def _purge_all_repository_tags(namespace_name, repository_name): - """ Immediately purge all repository tags without respecting the lifeline procedure """ - try: - repo = _basequery.get_existing_repository(namespace_name, repository_name) - except Repository.DoesNotExist: - raise DataModelException('Invalid repository \'%s/%s\'' % - (namespace_name, repository_name)) - - # Finds all the tags to delete. - repo_tags = list(RepositoryTag.select().where(RepositoryTag.repository == repo.id)) - if not repo_tags: - return - - # Find all labels to delete. - manifest_labels_query = (TagManifestLabel - .select() - .where(TagManifestLabel.repository == repo)) - - label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query] - if label_ids: - # Delete all the mapping entries. - TagManifestLabel.delete().where(TagManifestLabel.repository == repo).execute() - - # Delete all the matching labels. - Label.delete().where(Label.id << label_ids).execute() - - # Delete all the manifests. - TagManifest.delete().where(TagManifest.tag << repo_tags).execute() - - # Delete all tags. - RepositoryTag.delete().where(RepositoryTag.repository == repo.id).execute() - - def purge_repository(namespace_name, repository_name): + repo = _basequery.get_existing_repository(namespace_name, repository_name) + # Delete all tags to allow gc to reclaim storage - _purge_all_repository_tags(namespace_name, repository_name) + previously_referenced = tag.purge_all_tags(repo) + unreferenced_image_q = Image.select(Image.id).where(Image.repository == repo) + + if len(previously_referenced) > 0: + unreferenced_image_q = (unreferenced_image_q + .where(~(Image.id << list(previously_referenced)))) + + unreferenced_candidates = set(img[0] for img in unreferenced_image_q.tuples()) # Gc to remove the images and storage - garbage_collect_repository(namespace_name, repository_name) + garbage_collect_repo(repo, previously_referenced | unreferenced_candidates) # Delete the rest of the repository metadata fetched = _basequery.get_existing_repository(namespace_name, repository_name) @@ -135,34 +111,46 @@ def find_repository_with_garbage(limit_to_gc_policy_s): return None -def garbage_collect_repository(namespace_name, repository_name): - repo = get_repository(namespace_name, repository_name) - if repo is not None: - garbage_collect_repo(repo) - - -def garbage_collect_repo(repo): +def garbage_collect_repo(repo, extra_candidate_set=None): logger.debug('Garbage collecting repository %s', repo.id) storage_id_whitelist = set() - tag.garbage_collect_tags(repo) + candidate_orphan_image_set = tag.garbage_collect_tags(repo) + + if extra_candidate_set: + candidate_orphan_image_set.update(extra_candidate_set) + + if not len(candidate_orphan_image_set): + logger.debug('No candidate images for GC for repo: %s', repo.id) + return + + candidates_orphans = list(candidate_orphan_image_set) with db_transaction(): - # Get a list of all images used by tags in the repository - tagged_images = (Image - .select(Image.id, Image.ancestors) - .join(RepositoryTag) - .where(Image.repository == repo)) + Candidate = Image.alias() + Tagged = Image.alias() + ancestor_superset = Tagged.ancestors ** db_concat_func(Candidate.ancestors, Candidate.id, '/%') - def gen_referenced_ancestors(): - for tagged_image in tagged_images: - # The ancestor list is in the format '/1/2/3/', extract just the ids - ancestor_id_strings = tagged_image.ancestor_list() - for img_id_str in ancestor_id_strings: - yield int(img_id_str) - yield tagged_image.id + # We are going to compute all images which are being referenced in two ways: + # First, we will find all images which have their ancestor paths appear in + # another image. Secondly, we union in all of the candidate images which are + # directly referenced by a tag. This can be used in a subquery to directly + # find which candidates are being referenced without any client side + # computation or extra round trips. + ancestor_referenced = (Candidate + .select(Candidate.id) + .join(Tagged, on=ancestor_superset) + .join(RepositoryTag, on=(Tagged.id == RepositoryTag.image)) + .where(RepositoryTag.repository == repo.id, + Candidate.id << candidates_orphans)) - referenced_ancestors = set(gen_referenced_ancestors()) + direct_referenced = (Candidate + .select(Candidate.id) + .join(RepositoryTag) + .where(RepositoryTag.repository == repo.id, + Candidate.id << candidates_orphans)) + + referenced_candidates = (direct_referenced | ancestor_referenced) # We desire two pieces of information from the database from the following # query: all of the image ids which are associated with this repository, @@ -171,13 +159,18 @@ def garbage_collect_repo(repo): # code, which is overkill for just two fields, we use a tuple query, and # feed that directly to the dictionary tuple constructor which takes an # iterable of tuples containing [(k, v), (k, v), ...] - all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo).tuples() - images_to_storages = dict(all_repo_images) - to_remove = list(set(images_to_storages.keys()).difference(referenced_ancestors)) + unreferenced_candidates = (Image + .select(Image.id, Image.storage) + .where(Image.id << candidates_orphans, + ~(Image.id << referenced_candidates)) + .tuples()) + + unreferecend_images_to_storages = dict(unreferenced_candidates) + to_remove = unreferecend_images_to_storages.keys() if len(to_remove) > 0: logger.info('Cleaning up unreferenced images: %s', to_remove) - storage_id_whitelist = {images_to_storages[to_remove_id] for to_remove_id in to_remove} + storage_id_whitelist = set(unreferecend_images_to_storages.values()) # Lookup any derived images for the images to remove. derived = DerivedStorageForImage.select().where( diff --git a/data/model/tag.py b/data/model/tag.py index 125339697..03fa30ec3 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -138,54 +138,86 @@ def delete_tag(namespace_name, repository_name, tag_name): def garbage_collect_tags(repo): - expired_time = get_epoch_timestamp() - repo.namespace_user.removed_tag_expiration_s + """ Remove all of the tags that have gone past their garbage collection + expiration window, and return a set of image ids which *may* have been + orphaned. + """ + def add_expiration_data(base_query): + expired_clause = get_epoch_timestamp() - Namespace.removed_tag_expiration_s + return (base_query + .switch(RepositoryTag) + .join(Repository) + .join(Namespace, on=(Repository.namespace_user == Namespace.id)) + .where(~(RepositoryTag.lifetime_end_ts >> None), + RepositoryTag.lifetime_end_ts <= expired_clause)) + return _delete_tags(repo, add_expiration_data) - tags_to_delete = list(RepositoryTag - .select(RepositoryTag.id) - .where(RepositoryTag.repository == repo, - ~(RepositoryTag.lifetime_end_ts >> None), - (RepositoryTag.lifetime_end_ts <= expired_time)) - .order_by(RepositoryTag.id)) +def purge_all_tags(repo): + """ Remove all tags from the repository, and return a set of all of the images + ids which are now orphaned. + """ + return _delete_tags(repo) - if len(tags_to_delete) > 0: - with db_transaction(): - manifests_to_delete = list(TagManifest - .select(TagManifest.id) - .join(RepositoryTag) - .where(RepositoryTag.id << tags_to_delete)) +def _delete_tags(repo, query_modifier=None): + """ Garbage collect the tags for a repository and return a set of the image + ids which may now be orphaned. + """ + tags_to_delete_q = (RepositoryTag + .select(RepositoryTag.id, Image.ancestors, Image.id) + .join(Image) + .where(RepositoryTag.repository == repo)) - num_deleted_manifests = 0 - if len(manifests_to_delete) > 0: - # Find the set of IDs for all the labels to delete. - manifest_labels_query = (TagManifestLabel - .select() - .where(TagManifestLabel.repository == repo, - TagManifestLabel.annotated << manifests_to_delete)) + if query_modifier is not None: + tags_to_delete_q = query_modifier(tags_to_delete_q) - label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query] - if label_ids: - # Delete all the mapping entries. - (TagManifestLabel - .delete() - .where(TagManifestLabel.repository == repo, - TagManifestLabel.annotated << manifests_to_delete) - .execute()) + tags_to_delete = list(tags_to_delete_q) - # Delete all the matching labels. - Label.delete().where(Label.id << label_ids).execute() + if len(tags_to_delete) == 0: + return set() - # Delete the tag manifests themselves. - num_deleted_manifests = (TagManifest - .delete() - .where(TagManifest.id << manifests_to_delete) - .execute()) + with db_transaction(): + manifests_to_delete = list(TagManifest + .select(TagManifest.id) + .join(RepositoryTag) + .where(RepositoryTag.id << tags_to_delete)) - num_deleted_tags = (RepositoryTag - .delete() - .where(RepositoryTag.id << tags_to_delete) - .execute()) + num_deleted_manifests = 0 + if len(manifests_to_delete) > 0: + # Find the set of IDs for all the labels to delete. + manifest_labels_query = (TagManifestLabel + .select() + .where(TagManifestLabel.repository == repo, + TagManifestLabel.annotated << manifests_to_delete)) - logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests) + label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query] + if label_ids: + # Delete all the mapping entries. + (TagManifestLabel + .delete() + .where(TagManifestLabel.repository == repo, + TagManifestLabel.annotated << manifests_to_delete) + .execute()) + + # Delete all the matching labels. + Label.delete().where(Label.id << label_ids).execute() + + + num_deleted_manifests = (TagManifest + .delete() + .where(TagManifest.id << manifests_to_delete) + .execute()) + + num_deleted_tags = (RepositoryTag + .delete() + .where(RepositoryTag.id << tags_to_delete) + .execute()) + + logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests) + + ancestors = reduce(lambda r, l: r | l, + (set(tag.image.ancestor_id_list()) for tag in tags_to_delete)) + direct_referenced = {tag.image.id for tag in tags_to_delete} + return ancestors | direct_referenced def _get_repo_tag_image(tag_name, include_storage, modifier): diff --git a/test/test_gc.py b/test/test_gc.py index 8be27da4b..b313f3024 100644 --- a/test/test_gc.py +++ b/test/test_gc.py @@ -144,18 +144,17 @@ class TestGarbageCollection(unittest.TestCase): return repo def gcNow(self, repository): - model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name) + model.repository.garbage_collect_repo(repository) def deleteTag(self, repository, tag, perform_gc=True): model.tag.delete_tag(repository.namespace_user.username, repository.name, tag) if perform_gc: - model.repository.garbage_collect_repository(repository.namespace_user.username, - repository.name) + model.repository.garbage_collect_repo(repository) def moveTag(self, repository, tag, docker_image_id): model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag, docker_image_id) - model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name) + model.repository.garbage_collect_repo(repository) def assertNotDeleted(self, repository, *args): for docker_image_id in args: @@ -212,7 +211,7 @@ class TestGarbageCollection(unittest.TestCase): self.assertEquals(REPO, repository.name) # GC the repository. - model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name) + model.repository.garbage_collect_repo(repository) # There should now be no repositories with garbage. self.assertIsNone(model.repository.find_repository_with_garbage(0)) diff --git a/test/test_manifests.py b/test/test_manifests.py index d8a4574f9..03f2ff539 100644 --- a/test/test_manifests.py +++ b/test/test_manifests.py @@ -36,7 +36,8 @@ class TestManifests(unittest.TestCase): def _perform_cleanup(self): database.RepositoryTag.delete().where(database.RepositoryTag.hidden == True).execute() - model.repository.garbage_collect_repository(ADMIN_ACCESS_USER, REPO) + repo_object = model.repository.get_repository(ADMIN_ACCESS_USER, REPO) + model.repository.garbage_collect_repo(repo_object) def test_missing_link(self): """ Tests for a corner case that could result in missing a link to a blob referenced by a From cf83c9a16a60c992ea9f237cf50ecc9a02298671 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Wed, 31 Aug 2016 11:42:31 -0400 Subject: [PATCH 4/5] Improve the garbage collection tests. --- data/model/repository.py | 29 +++- test/test_gc.py | 290 ++++++++++++++++----------------------- 2 files changed, 147 insertions(+), 172 deletions(-) diff --git a/data/model/repository.py b/data/model/repository.py index 75b4b65c7..a1b8196e1 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -44,6 +44,12 @@ def get_repository(namespace_name, repository_name): def purge_repository(namespace_name, repository_name): + """ Completely delete all traces of the repository. Will return True upon + complete success, and False upon partial or total failure. Garbage + collection is incremental and repeatable, so this return value does + not need to be checked or responded to. + """ + repo = _basequery.get_existing_repository(namespace_name, repository_name) # Delete all tags to allow gc to reclaim storage @@ -57,12 +63,18 @@ def purge_repository(namespace_name, repository_name): unreferenced_candidates = set(img[0] for img in unreferenced_image_q.tuples()) # Gc to remove the images and storage - garbage_collect_repo(repo, previously_referenced | unreferenced_candidates) + all_repo_images = previously_referenced | unreferenced_candidates + successful_gc = garbage_collect_repo(repo, all_repo_images) + + if not successful_gc: + return False # Delete the rest of the repository metadata fetched = _basequery.get_existing_repository(namespace_name, repository_name) fetched.delete_instance(recursive=True, delete_nullable=False) + return True + @ttl_cache(maxsize=1, ttl=600) def _get_gc_expiration_policies(): @@ -112,6 +124,13 @@ def find_repository_with_garbage(limit_to_gc_policy_s): def garbage_collect_repo(repo, extra_candidate_set=None): + """ Garbage collect the specified repository object. This will remove all + images, derived images, and other associated metadata, for images which + are no longer referenced by a tag or another image which is itself + tagged. Returns True if garbage collection was completed without error + and False otherwise. Retries are safe and work incrementally, so this + return value does not need to be checked or handled. + """ logger.debug('Garbage collecting repository %s', repo.id) storage_id_whitelist = set() @@ -122,7 +141,7 @@ def garbage_collect_repo(repo, extra_candidate_set=None): if not len(candidate_orphan_image_set): logger.debug('No candidate images for GC for repo: %s', repo.id) - return + return True candidates_orphans = list(candidate_orphan_image_set) @@ -190,18 +209,20 @@ def garbage_collect_repo(repo, extra_candidate_set=None): .execute()) except IntegrityError: logger.info('Could not GC derived images %s; will try again soon', to_remove) - return + return False try: Image.delete().where(Image.id << to_remove).execute() except IntegrityError: logger.info('Could not GC images %s; will try again soon', to_remove) - return + return False if len(to_remove) > 0: logger.info('Garbage collecting storage for images: %s', to_remove) storage.garbage_collect_storage(storage_id_whitelist) + return True + def star_repository(user, repository): """ Stars a repository. """ diff --git a/test/test_gc.py b/test/test_gc.py index b313f3024..c366a5a83 100644 --- a/test/test_gc.py +++ b/test/test_gc.py @@ -1,13 +1,13 @@ import unittest import time +from contextlib import contextmanager from playhouse.test_utils import assert_query_count from app import app, storage from initdb import setup_database_for_testing, finished_database_for_testing from data import model, database from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel -from endpoints.v2.manifest import _generate_and_store_manifest ADMIN_ACCESS_USER = 'devtable' @@ -16,48 +16,6 @@ PUBLIC_USER = 'public' REPO = 'somerepo' -class assert_no_new_dangling_labels(object): - """ Specialized assertion for ensuring that GC cleans up all labels. - """ - def __init__(self): - self.existing_count = 0 - - def _get_dangling_count(self): - label_ids = set([current.id for current in Label.select()]) - referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()]) - return len(label_ids - referenced_by_manifest) - - def __enter__(self): - self.existing_count = self._get_dangling_count() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - updated_count = self._get_dangling_count() - assert updated_count == self.existing_count - - -class assert_no_new_dangling_storages(object): - """ Specialized assertion for ensuring that GC cleans up all dangling storages. - """ - def __init__(self): - self.existing_count = 0 - - def _get_dangling_count(self): - storage_ids = set([current.id for current in ImageStorage.select()]) - referneced_by_image = set([image.storage_id for image in Image.select()]) - referenced_by_derived = set([derived.derivative_id for derived in DerivedStorageForImage.select()]) - - return len(storage_ids - referneced_by_image - referenced_by_derived) - - def __enter__(self): - self.existing_count = self._get_dangling_count() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - updated_count = self._get_dangling_count() - assert updated_count == self.existing_count - - class TestGarbageCollection(unittest.TestCase): @staticmethod def _set_tag_expiration_policy(namespace, expiration_s): @@ -78,7 +36,8 @@ class TestGarbageCollection(unittest.TestCase): finished_database_for_testing(self) self.ctx.__exit__(True, None, None) - def createImage(self, docker_image_id, repository_obj, username): + @staticmethod + def createImage(docker_image_id, repository_obj, username): preferred = storage.preferred_locations[0] image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {}, preferred) @@ -91,10 +50,9 @@ class TestGarbageCollection(unittest.TestCase): # Add some torrent info. try: + database.TorrentInfo.get(storage=image.storage) + except database.TorrentInfo.DoesNotExist: model.storage.save_torrent_info(image.storage, 1, 'helloworld') - model.storage.save_torrent_info(image.storage, 2, 'helloworlds!') - except: - pass # Add some additional placements to the image. for location_name in ['local_eu']: @@ -144,17 +102,17 @@ class TestGarbageCollection(unittest.TestCase): return repo def gcNow(self, repository): - model.repository.garbage_collect_repo(repository) + self.assertTrue(model.repository.garbage_collect_repo(repository)) def deleteTag(self, repository, tag, perform_gc=True): model.tag.delete_tag(repository.namespace_user.username, repository.name, tag) if perform_gc: - model.repository.garbage_collect_repo(repository) + self.assertTrue(model.repository.garbage_collect_repo(repository)) def moveTag(self, repository, tag, docker_image_id): model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag, - docker_image_id) - model.repository.garbage_collect_repo(repository) + docker_image_id) + self.assertTrue(model.repository.garbage_collect_repo(repository)) def assertNotDeleted(self, repository, *args): for docker_image_id in args: @@ -172,14 +130,43 @@ class TestGarbageCollection(unittest.TestCase): self.fail('Expected image %s to be deleted' % docker_image_id) + @staticmethod + def _get_dangling_storage_count(): + storage_ids = set([current.id for current in ImageStorage.select()]) + referenced_by_image = set([image.storage_id for image in Image.select()]) + referenced_by_derived = set([derived.derivative_id + for derived in DerivedStorageForImage.select()]) + + return len(storage_ids - referenced_by_image - referenced_by_derived) + + @staticmethod + def _get_dangling_label_count(): + label_ids = set([current.id for current in Label.select()]) + referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()]) + return len(label_ids - referenced_by_manifest) + + @contextmanager + def assert_no_new_dangling_storages_or_labels(self): + """ Specialized assertion for ensuring that GC cleans up all dangling storages + and labels. + """ + # TODO: Consider also asserting the number of DB queries being performed. + existing_storage_count = self._get_dangling_storage_count() + existing_label_count = self._get_dangling_label_count() + yield + updated_storage_count = self._get_dangling_storage_count() + self.assertEqual(updated_storage_count, existing_storage_count) + + updated_label_count = self._get_dangling_label_count() + self.assertEqual(updated_label_count, existing_label_count) def test_has_garbage(self): """ Remove all existing repositories, then add one without garbage, check, then add one with garbage, and check again. """ # Delete all existing repos. - for repo in database.Repository.select(): - model.repository.purge_repository(repo.namespace_user.username, repo.name) + for repo in database.Repository.select().order_by(database.Repository.id): + self.assertTrue(model.repository.purge_repository(repo.namespace_user.username, repo.name)) # Change the time machine expiration on the namespace. (database.User @@ -211,191 +198,158 @@ class TestGarbageCollection(unittest.TestCase): self.assertEquals(REPO, repository.name) # GC the repository. - model.repository.garbage_collect_repo(repository) + self.assertTrue(model.repository.garbage_collect_repo(repository)) # There should now be no repositories with garbage. self.assertIsNone(model.repository.find_repository_with_garbage(0)) - def test_find_garbage_policy_functions(self): with assert_query_count(1): one_policy = model.repository.get_random_gc_policy() all_policies = model.repository._get_gc_expiration_policies() self.assertIn(one_policy, all_policies) - def test_one_tag(self): """ Create a repository with a single tag, then remove that tag and verify that the repository is now empty. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3']) - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i1', 'i2', 'i3') - + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3']) + self.deleteTag(repository, 'latest') + self.assertDeleted(repository, 'i1', 'i2', 'i3') def test_two_tags_unshared_images(self): """ Repository has two tags with no shared images between them. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2']) - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i1', 'i2', 'i3') - self.assertNotDeleted(repository, 'f1', 'f2') - + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2']) + self.deleteTag(repository, 'latest') + self.assertDeleted(repository, 'i1', 'i2', 'i3') + self.assertNotDeleted(repository, 'f1', 'f2') def test_two_tags_shared_images(self): """ Repository has two tags with shared images. Deleting the tag should only remove the unshared images. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') - + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) + self.deleteTag(repository, 'latest') + self.assertDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') def test_unrelated_repositories(self): """ Two repositories with different images. Removing the tag from one leaves the other's images intact. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') - repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2') + with self.assert_no_new_dangling_storages_or_labels(): + repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') + repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2') - self.deleteTag(repository1, 'latest') - - self.assertDeleted(repository1, 'i1', 'i2', 'i3') - self.assertNotDeleted(repository2, 'j1', 'j2', 'j3') + self.deleteTag(repository1, 'latest') + self.assertDeleted(repository1, 'i1', 'i2', 'i3') + self.assertNotDeleted(repository2, 'j1', 'j2', 'j3') def test_related_repositories(self): """ Two repositories with shared images. Removing the tag from one leaves the other's images intact. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') - repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2') + with self.assert_no_new_dangling_storages_or_labels(): + repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') + repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2') - self.deleteTag(repository1, 'latest') - - self.assertDeleted(repository1, 'i3') - self.assertNotDeleted(repository2, 'i1', 'i2', 'j1') + self.deleteTag(repository1, 'latest') + self.assertDeleted(repository1, 'i3') + self.assertNotDeleted(repository2, 'i1', 'i2', 'j1') def test_inaccessible_repositories(self): """ Two repositories under different namespaces should result in the images being deleted but not completely removed from the database. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3']) - repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3']) - - self.deleteTag(repository1, 'latest') - self.assertDeleted(repository1, 'i1', 'i2', 'i3') - self.assertNotDeleted(repository2, 'i1', 'i2', 'i3') + with self.assert_no_new_dangling_storages_or_labels(): + repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3']) + repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3']) + self.deleteTag(repository1, 'latest') + self.assertDeleted(repository1, 'i1', 'i2', 'i3') + self.assertNotDeleted(repository2, 'i1', 'i2', 'i3') def test_multiple_shared_images(self): """ Repository has multiple tags with shared images. Selectively deleting the tags, and verifying at each step. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], - third=['t1', 't2', 't3'], fourth=['i1', 'f1']) + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], + third=['t1', 't2', 't3'], fourth=['i1', 'f1']) - # Delete tag other. Should delete f2, since it is not shared. - self.deleteTag(repository, 'other') - self.assertDeleted(repository, 'f2') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') + # Delete tag other. Should delete f2, since it is not shared. + self.deleteTag(repository, 'other') + self.assertDeleted(repository, 'f2') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') - # Move tag fourth to i3. This should remove f1 since it is no longer referenced. - self.moveTag(repository, 'fourth', 'i3') - self.assertDeleted(repository, 'f1') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') + # Move tag fourth to i3. This should remove f1 since it is no longer referenced. + self.moveTag(repository, 'fourth', 'i3') + self.assertDeleted(repository, 'f1') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') - # Delete tag 'latest'. This should do nothing since fourth is on the same branch. - self.deleteTag(repository, 'latest') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') + # Delete tag 'latest'. This should do nothing since fourth is on the same branch. + self.deleteTag(repository, 'latest') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') - # Delete tag 'third'. This should remove t1->t3. - self.deleteTag(repository, 'third') - self.assertDeleted(repository, 't1', 't2', 't3') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3') + # Delete tag 'third'. This should remove t1->t3. + self.deleteTag(repository, 'third') + self.assertDeleted(repository, 't1', 't2', 't3') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3') - # Add tag to i1. - self.moveTag(repository, 'newtag', 'i1') - self.assertNotDeleted(repository, 'i1', 'i2', 'i3') + # Add tag to i1. + self.moveTag(repository, 'newtag', 'i1') + self.assertNotDeleted(repository, 'i1', 'i2', 'i3') - # Delete tag 'fourth'. This should remove i2 and i3. - self.deleteTag(repository, 'fourth') - self.assertDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1') - - # Delete tag 'newtag'. This should remove the remaining image. - self.deleteTag(repository, 'newtag') - self.assertDeleted(repository, 'i1') + # Delete tag 'fourth'. This should remove i2 and i3. + self.deleteTag(repository, 'fourth') + self.assertDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1') + # Delete tag 'newtag'. This should remove the remaining image. + self.deleteTag(repository, 'newtag') + self.assertDeleted(repository, 'i1') def test_empty_gc(self): - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], - third=['t1', 't2', 't3'], fourth=['i1', 'f1']) - - self.gcNow(repository) - self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2') + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], + third=['t1', 't2', 't3'], fourth=['i1', 'f1']) + self.gcNow(repository) + self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2') def test_time_machine_no_gc(self): """ Repository has two tags with shared images. Deleting the tag should not remove any images """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24) - - self.deleteTag(repository, 'latest') - self.assertNotDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) + self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24) + self.deleteTag(repository, 'latest') + self.assertNotDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') def test_time_machine_gc(self): """ Repository has two tags with shared images. Deleting the second tag should cause the images for the first deleted tag to gc. """ - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) + with self.assert_no_new_dangling_storages_or_labels(): + repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - self._set_tag_expiration_policy(repository.namespace_user.username, 1) + self._set_tag_expiration_policy(repository.namespace_user.username, 1) - self.deleteTag(repository, 'latest') - self.assertNotDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') + self.deleteTag(repository, 'latest') + self.assertNotDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') - time.sleep(2) + time.sleep(2) - self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc - self.assertDeleted(repository, 'i2', 'i3') - self.assertNotDeleted(repository, 'i1', 'f1') - - - def test_manifest_gc(self): - with assert_no_new_dangling_labels(): - with assert_no_new_dangling_storages(): - repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) - _generate_and_store_manifest(ADMIN_ACCESS_USER, REPO, 'latest') - - self._set_tag_expiration_policy(repository.namespace_user.username, 0) - - self.deleteTag(repository, 'latest') - self.assertDeleted(repository, 'i2', 'i3') + self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc + self.assertDeleted(repository, 'i2', 'i3') + self.assertNotDeleted(repository, 'i1', 'f1') if __name__ == '__main__': From d56f570d3b78924e52f719337ddfd15848189761 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Thu, 1 Sep 2016 10:49:14 -0400 Subject: [PATCH 5/5] Improve the imagetree test. --- test/test_imagetree.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/test/test_imagetree.py b/test/test_imagetree.py index 5189d9d7d..44b0fc10c 100644 --- a/test/test_imagetree.py +++ b/test/test_imagetree.py @@ -20,7 +20,8 @@ class TestImageTree(unittest.TestCase): finished_database_for_testing(self) self.ctx.__exit__(True, None, None) - def _get_base_image(self, all_images): + @staticmethod + def _get_base_image(all_images): for image in all_images: if image.ancestors == '/': return image @@ -90,29 +91,27 @@ class TestImageTree(unittest.TestCase): # still return the tag that contains them. self.assertEquals('staging', tree.tag_containing_image(result[0])) - def test_longest_path_simple_repo_direct_lookup(self): repository = model.repository.get_repository(NAMESPACE, SIMPLE_REPO) all_images = list(model.image.get_repository_images(NAMESPACE, SIMPLE_REPO)) all_tags = list(model.tag.list_repository_tags(NAMESPACE, SIMPLE_REPO)) base_image = self._get_base_image(all_images) - tag_image = all_tags[0].image def checker(index, image): return True - filtered_images = model.image.get_repository_images_without_placements(repository, - with_ancestor=base_image) + filtered_images = model.image.get_repository_images_without_placements( + repository, + with_ancestor=base_image) self.assertEquals(set([f.id for f in filtered_images]), set([a.id for a in all_images])) tree = ImageTree(filtered_images, all_tags) - ancestors = tag_image.ancestors.split('/')[2:-1] # Skip the first image. result = tree.find_longest_path(base_image.id, checker) self.assertEquals(3, len(result)) self.assertEquals('latest', tree.tag_containing_image(result[-1])) + if __name__ == '__main__': unittest.main() -