Merge pull request #1772 from jakedt/alternategc

Alternategc
This commit is contained in:
Jake Moshenko 2016-09-07 13:41:30 -04:00 committed by GitHub
commit 4d11e9b4f5
10 changed files with 358 additions and 325 deletions

View file

@ -34,6 +34,7 @@ _SCHEME_DRIVERS = {
'postgresql+psycopg2': PostgresqlDatabase, 'postgresql+psycopg2': PostgresqlDatabase,
} }
SCHEME_RANDOM_FUNCTION = { SCHEME_RANDOM_FUNCTION = {
'mysql': fn.Rand, 'mysql': fn.Rand,
'mysql+pymysql': fn.Rand, 'mysql+pymysql': fn.Rand,
@ -42,12 +43,37 @@ SCHEME_RANDOM_FUNCTION = {
'postgresql+psycopg2': fn.Random, 'postgresql+psycopg2': fn.Random,
} }
def pipes_concat(arg1, arg2, *extra_args):
""" Concat function for sqlite, since it doesn't support fn.Concat.
Concatenates clauses with || characters.
"""
reduced = arg1.concat(arg2)
for arg in extra_args:
reduced = reduced.concat(arg)
return reduced
def function_concat(arg1, arg2, *extra_args):
""" Default implementation of concat which uses fn.Concat(). Used by all
database engines except sqlite.
"""
return fn.Concat(arg1, arg2, *extra_args)
SCHEME_SPECIALIZED_CONCAT = {
'sqlite': pipes_concat,
}
def real_for_update(query): def real_for_update(query):
return query.for_update() return query.for_update()
def null_for_update(query): def null_for_update(query):
return query return query
def delete_instance_filtered(instance, model_class, delete_nullable, skip_transitive_deletes): def delete_instance_filtered(instance, model_class, delete_nullable, skip_transitive_deletes):
""" Deletes the DB instance recursively, skipping any models in the skip_transitive_deletes set. """ Deletes the DB instance recursively, skipping any models in the skip_transitive_deletes set.
@ -181,6 +207,7 @@ read_slave = Proxy()
db_random_func = CallableProxy() db_random_func = CallableProxy()
db_for_update = CallableProxy() db_for_update = CallableProxy()
db_transaction = CallableProxy() db_transaction = CallableProxy()
db_concat_func = CallableProxy()
def validate_database_url(url, db_kwargs, connect_timeout=5): def validate_database_url(url, db_kwargs, connect_timeout=5):
@ -227,6 +254,8 @@ def configure(config_object):
db_random_func.initialize(SCHEME_RANDOM_FUNCTION[parsed_write_uri.drivername]) db_random_func.initialize(SCHEME_RANDOM_FUNCTION[parsed_write_uri.drivername])
db_for_update.initialize(SCHEME_SPECIALIZED_FOR_UPDATE.get(parsed_write_uri.drivername, db_for_update.initialize(SCHEME_SPECIALIZED_FOR_UPDATE.get(parsed_write_uri.drivername,
real_for_update)) real_for_update))
db_concat_func.initialize(SCHEME_SPECIALIZED_CONCAT.get(parsed_write_uri.drivername,
function_concat))
read_slave_uri = config_object.get('DB_READ_SLAVE_URI', None) read_slave_uri = config_object.get('DB_READ_SLAVE_URI', None)
if read_slave_uri is not None: if read_slave_uri is not None:
@ -616,6 +645,12 @@ class Image(BaseModel):
(('security_indexed_engine', 'security_indexed'), False), (('security_indexed_engine', 'security_indexed'), False),
) )
def ancestor_id_list(self):
""" Returns an integer list of ancestor ids, ordered chronologically from
root to direct parent.
"""
return map(int, self.ancestors.split('/')[1:-1])
_ImageProxy.initialize(Image) _ImageProxy.initialize(Image)

View file

@ -397,9 +397,7 @@ def get_repo_image_by_storage_checksum(namespace, repository_name, storage_check
def get_image_layers(image): def get_image_layers(image):
""" Returns a list of the full layers of an image, including itself (if specified), sorted """ Returns a list of the full layers of an image, including itself (if specified), sorted
from base image outward. """ from base image outward. """
ancestors = image.ancestors.split('/')[1:-1] image_ids = image.ancestor_id_list() + [image.id]
image_ids = [ancestor_id for ancestor_id in ancestors if ancestor_id]
image_ids.append(str(image.id))
query = (ImageStoragePlacement query = (ImageStoragePlacement
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation) .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
@ -410,7 +408,7 @@ def get_image_layers(image):
.where(Image.id << image_ids)) .where(Image.id << image_ids))
image_list = list(invert_placement_query_results(query)) image_list = list(invert_placement_query_results(query))
image_list.sort(key=lambda image: image_ids.index(str(image.id))) image_list.sort(key=lambda img: image_ids.index(img.id))
return image_list return image_list

View file

@ -11,7 +11,7 @@ from data.database import (Repository, Namespace, RepositoryTag, Star, Image, Us
Visibility, RepositoryPermission, RepositoryActionCount, Visibility, RepositoryPermission, RepositoryActionCount,
Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage, Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage,
Label, TagManifestLabel, db_for_update, get_epoch_timestamp, Label, TagManifestLabel, db_for_update, get_epoch_timestamp,
db_random_func) db_random_func, db_concat_func)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -43,50 +43,38 @@ def get_repository(namespace_name, repository_name):
return None return None
def _purge_all_repository_tags(namespace_name, repository_name):
""" Immediately purge all repository tags without respecting the lifeline procedure """
try:
repo = _basequery.get_existing_repository(namespace_name, repository_name)
except Repository.DoesNotExist:
raise DataModelException('Invalid repository \'%s/%s\'' %
(namespace_name, repository_name))
# Finds all the tags to delete.
repo_tags = list(RepositoryTag.select().where(RepositoryTag.repository == repo.id))
if not repo_tags:
return
# Find all labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
.where(TagManifestLabel.repository == repo))
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
if label_ids:
# Delete all the mapping entries.
TagManifestLabel.delete().where(TagManifestLabel.repository == repo).execute()
# Delete all the matching labels.
Label.delete().where(Label.id << label_ids).execute()
# Delete all the manifests.
TagManifest.delete().where(TagManifest.tag << repo_tags).execute()
# Delete all tags.
RepositoryTag.delete().where(RepositoryTag.repository == repo.id).execute()
def purge_repository(namespace_name, repository_name): def purge_repository(namespace_name, repository_name):
""" Completely delete all traces of the repository. Will return True upon
complete success, and False upon partial or total failure. Garbage
collection is incremental and repeatable, so this return value does
not need to be checked or responded to.
"""
repo = _basequery.get_existing_repository(namespace_name, repository_name)
# Delete all tags to allow gc to reclaim storage # Delete all tags to allow gc to reclaim storage
_purge_all_repository_tags(namespace_name, repository_name) previously_referenced = tag.purge_all_tags(repo)
unreferenced_image_q = Image.select(Image.id).where(Image.repository == repo)
if len(previously_referenced) > 0:
unreferenced_image_q = (unreferenced_image_q
.where(~(Image.id << list(previously_referenced))))
unreferenced_candidates = set(img[0] for img in unreferenced_image_q.tuples())
# Gc to remove the images and storage # Gc to remove the images and storage
garbage_collect_repository(namespace_name, repository_name) all_repo_images = previously_referenced | unreferenced_candidates
successful_gc = garbage_collect_repo(repo, all_repo_images)
if not successful_gc:
return False
# Delete the rest of the repository metadata # Delete the rest of the repository metadata
fetched = _basequery.get_existing_repository(namespace_name, repository_name) fetched = _basequery.get_existing_repository(namespace_name, repository_name)
fetched.delete_instance(recursive=True, delete_nullable=False) fetched.delete_instance(recursive=True, delete_nullable=False)
return True
@ttl_cache(maxsize=1, ttl=600) @ttl_cache(maxsize=1, ttl=600)
def _get_gc_expiration_policies(): def _get_gc_expiration_policies():
@ -135,34 +123,53 @@ def find_repository_with_garbage(limit_to_gc_policy_s):
return None return None
def garbage_collect_repository(namespace_name, repository_name): def garbage_collect_repo(repo, extra_candidate_set=None):
repo = get_repository(namespace_name, repository_name) """ Garbage collect the specified repository object. This will remove all
if repo is not None: images, derived images, and other associated metadata, for images which
garbage_collect_repo(repo) are no longer referenced by a tag or another image which is itself
tagged. Returns True if garbage collection was completed without error
and False otherwise. Retries are safe and work incrementally, so this
def garbage_collect_repo(repo): return value does not need to be checked or handled.
"""
logger.debug('Garbage collecting repository %s', repo.id) logger.debug('Garbage collecting repository %s', repo.id)
storage_id_whitelist = set() storage_id_whitelist = set()
tag.garbage_collect_tags(repo) candidate_orphan_image_set = tag.garbage_collect_tags(repo)
if extra_candidate_set:
candidate_orphan_image_set.update(extra_candidate_set)
if not len(candidate_orphan_image_set):
logger.debug('No candidate images for GC for repo: %s', repo.id)
return True
candidates_orphans = list(candidate_orphan_image_set)
with db_transaction(): with db_transaction():
# Get a list of all images used by tags in the repository Candidate = Image.alias()
tagged_images = (Image Tagged = Image.alias()
.select(Image.id, Image.ancestors) ancestor_superset = Tagged.ancestors ** db_concat_func(Candidate.ancestors, Candidate.id, '/%')
.join(RepositoryTag)
.where(Image.repository == repo))
def gen_referenced_ancestors(): # We are going to compute all images which are being referenced in two ways:
for tagged_image in tagged_images: # First, we will find all images which have their ancestor paths appear in
# The ancestor list is in the format '/1/2/3/', extract just the ids # another image. Secondly, we union in all of the candidate images which are
ancestor_id_strings = tagged_image.ancestors.split('/')[1:-1] # directly referenced by a tag. This can be used in a subquery to directly
for img_id_str in ancestor_id_strings: # find which candidates are being referenced without any client side
yield int(img_id_str) # computation or extra round trips.
yield tagged_image.id ancestor_referenced = (Candidate
.select(Candidate.id)
.join(Tagged, on=ancestor_superset)
.join(RepositoryTag, on=(Tagged.id == RepositoryTag.image))
.where(RepositoryTag.repository == repo.id,
Candidate.id << candidates_orphans))
referenced_ancestors = set(gen_referenced_ancestors()) direct_referenced = (Candidate
.select(Candidate.id)
.join(RepositoryTag)
.where(RepositoryTag.repository == repo.id,
Candidate.id << candidates_orphans))
referenced_candidates = (direct_referenced | ancestor_referenced)
# We desire two pieces of information from the database from the following # We desire two pieces of information from the database from the following
# query: all of the image ids which are associated with this repository, # query: all of the image ids which are associated with this repository,
@ -171,13 +178,18 @@ def garbage_collect_repo(repo):
# code, which is overkill for just two fields, we use a tuple query, and # code, which is overkill for just two fields, we use a tuple query, and
# feed that directly to the dictionary tuple constructor which takes an # feed that directly to the dictionary tuple constructor which takes an
# iterable of tuples containing [(k, v), (k, v), ...] # iterable of tuples containing [(k, v), (k, v), ...]
all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo).tuples() unreferenced_candidates = (Image
images_to_storages = dict(all_repo_images) .select(Image.id, Image.storage)
to_remove = list(set(images_to_storages.keys()).difference(referenced_ancestors)) .where(Image.id << candidates_orphans,
~(Image.id << referenced_candidates))
.tuples())
unreferecend_images_to_storages = dict(unreferenced_candidates)
to_remove = unreferecend_images_to_storages.keys()
if len(to_remove) > 0: if len(to_remove) > 0:
logger.info('Cleaning up unreferenced images: %s', to_remove) logger.info('Cleaning up unreferenced images: %s', to_remove)
storage_id_whitelist = {images_to_storages[to_remove_id] for to_remove_id in to_remove} storage_id_whitelist = set(unreferecend_images_to_storages.values())
# Lookup any derived images for the images to remove. # Lookup any derived images for the images to remove.
derived = DerivedStorageForImage.select().where( derived = DerivedStorageForImage.select().where(
@ -197,18 +209,20 @@ def garbage_collect_repo(repo):
.execute()) .execute())
except IntegrityError: except IntegrityError:
logger.info('Could not GC derived images %s; will try again soon', to_remove) logger.info('Could not GC derived images %s; will try again soon', to_remove)
return return False
try: try:
Image.delete().where(Image.id << to_remove).execute() Image.delete().where(Image.id << to_remove).execute()
except IntegrityError: except IntegrityError:
logger.info('Could not GC images %s; will try again soon', to_remove) logger.info('Could not GC images %s; will try again soon', to_remove)
return return False
if len(to_remove) > 0: if len(to_remove) > 0:
logger.info('Garbage collecting storage for images: %s', to_remove) logger.info('Garbage collecting storage for images: %s', to_remove)
storage.garbage_collect_storage(storage_id_whitelist) storage.garbage_collect_storage(storage_id_whitelist)
return True
def star_repository(user, repository): def star_repository(user, repository):
""" Stars a repository. """ """ Stars a repository. """

View file

@ -138,64 +138,97 @@ def delete_tag(namespace_name, repository_name, tag_name):
def garbage_collect_tags(repo): def garbage_collect_tags(repo):
expired_time = get_epoch_timestamp() - repo.namespace_user.removed_tag_expiration_s """ Remove all of the tags that have gone past their garbage collection
expiration window, and return a set of image ids which *may* have been
orphaned.
"""
def add_expiration_data(base_query):
expired_clause = get_epoch_timestamp() - Namespace.removed_tag_expiration_s
return (base_query
.switch(RepositoryTag)
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(~(RepositoryTag.lifetime_end_ts >> None),
RepositoryTag.lifetime_end_ts <= expired_clause))
return _delete_tags(repo, add_expiration_data)
tags_to_delete = list(RepositoryTag def purge_all_tags(repo):
.select(RepositoryTag.id) """ Remove all tags from the repository, and return a set of all of the images
.where(RepositoryTag.repository == repo, ids which are now orphaned.
~(RepositoryTag.lifetime_end_ts >> None), """
(RepositoryTag.lifetime_end_ts <= expired_time)) return _delete_tags(repo)
.order_by(RepositoryTag.id))
if len(tags_to_delete) > 0: def _delete_tags(repo, query_modifier=None):
with db_transaction(): """ Garbage collect the tags for a repository and return a set of the image
manifests_to_delete = list(TagManifest ids which may now be orphaned.
.select(TagManifest.id) """
.join(RepositoryTag) tags_to_delete_q = (RepositoryTag
.where(RepositoryTag.id << tags_to_delete)) .select(RepositoryTag.id, Image.ancestors, Image.id)
.join(Image)
.where(RepositoryTag.repository == repo))
num_deleted_manifests = 0 if query_modifier is not None:
if len(manifests_to_delete) > 0: tags_to_delete_q = query_modifier(tags_to_delete_q)
# Find the set of IDs for all the labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete))
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query] tags_to_delete = list(tags_to_delete_q)
if label_ids:
# Delete all the mapping entries.
(TagManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete)
.execute())
# Delete all the matching labels. if len(tags_to_delete) == 0:
Label.delete().where(Label.id << label_ids).execute() return set()
# Delete the tag manifests themselves. with db_transaction():
num_deleted_manifests = (TagManifest manifests_to_delete = list(TagManifest
.delete() .select(TagManifest.id)
.where(TagManifest.id << manifests_to_delete) .join(RepositoryTag)
.execute()) .where(RepositoryTag.id << tags_to_delete))
num_deleted_tags = (RepositoryTag num_deleted_manifests = 0
.delete() if len(manifests_to_delete) > 0:
.where(RepositoryTag.id << tags_to_delete) # Find the set of IDs for all the labels to delete.
.execute()) manifest_labels_query = (TagManifestLabel
.select()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete))
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests) label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
if label_ids:
# Delete all the mapping entries.
(TagManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete)
.execute())
# Delete all the matching labels.
Label.delete().where(Label.id << label_ids).execute()
num_deleted_manifests = (TagManifest
.delete()
.where(TagManifest.id << manifests_to_delete)
.execute())
num_deleted_tags = (RepositoryTag
.delete()
.where(RepositoryTag.id << tags_to_delete)
.execute())
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
ancestors = reduce(lambda r, l: r | l,
(set(tag.image.ancestor_id_list()) for tag in tags_to_delete))
direct_referenced = {tag.image.id for tag in tags_to_delete}
return ancestors | direct_referenced
def _get_repo_tag_image(tag_name, include_storage, modifier): def _get_repo_tag_image(tag_name, include_storage, modifier):
query = Image.select().join(RepositoryTag) query = Image.select().join(RepositoryTag)
if include_storage: if include_storage:
query = (Image.select(Image, ImageStorage) query = (Image
.join(ImageStorage) .select(Image, ImageStorage)
.switch(Image) .join(ImageStorage)
.join(RepositoryTag)) .switch(Image)
.join(RepositoryTag))
images = _tag_alive(modifier(query.where(RepositoryTag.name == tag_name))) images = _tag_alive(modifier(query.where(RepositoryTag.name == tag_name)))
if not images: if not images:
@ -213,10 +246,11 @@ def get_repo_tag_image(repo, tag_name, include_storage=False):
def get_tag_image(namespace_name, repository_name, tag_name, include_storage=False): def get_tag_image(namespace_name, repository_name, tag_name, include_storage=False):
def modifier(query): def modifier(query):
return (query.switch(RepositoryTag) return (query
.join(Repository) .switch(RepositoryTag)
.join(Namespace) .join(Repository)
.where(Namespace.username == namespace_name, Repository.name == repository_name)) .join(Namespace)
.where(Namespace.username == namespace_name, Repository.name == repository_name))
return _get_repo_tag_image(tag_name, include_storage, modifier) return _get_repo_tag_image(tag_name, include_storage, modifier)

View file

@ -13,7 +13,7 @@ def image_view(image, image_map, include_ancestors=True):
command = image.command command = image.command
def docker_id(aid): def docker_id(aid):
if not aid or not aid in image_map: if aid not in image_map:
return '' return ''
return image_map[aid].docker_image_id return image_map[aid].docker_image_id
@ -30,14 +30,14 @@ def image_view(image, image_map, include_ancestors=True):
if include_ancestors: if include_ancestors:
# Calculate the ancestors string, with the DBID's replaced with the docker IDs. # Calculate the ancestors string, with the DBID's replaced with the docker IDs.
ancestors = [docker_id(a) for a in image.ancestors.split('/')] ancestors = [docker_id(a) for a in image.ancestor_id_list()]
image_data['ancestors'] = '/'.join(ancestors) image_data['ancestors'] = '/{0}/'.format('/'.join(ancestors))
return image_data return image_data
def historical_image_view(image, image_map): def historical_image_view(image, image_map):
ancestors = [image_map[a] for a in image.ancestors.split('/')[1:-1]] ancestors = [image_map[a] for a in image.ancestor_id_list()]
normal_view = image_view(image, image_map) normal_view = image_view(image, image_map)
normal_view['history'] = [image_view(parent, image_map, False) for parent in ancestors] normal_view['history'] = [image_view(parent, image_map, False) for parent in ancestors]
return normal_view return normal_view
@ -58,23 +58,23 @@ class RepositoryImageList(RepositoryParamResource):
all_images = model.image.get_repository_images_without_placements(repo) all_images = model.image.get_repository_images_without_placements(repo)
all_tags = model.tag.list_repository_tags(namespace, repository) all_tags = model.tag.list_repository_tags(namespace, repository)
tags_by_image_id = defaultdict(list) tags_by_docker_id = defaultdict(list)
found_image_ids = set() found_image_ids = set()
for tag in all_tags: for tag in all_tags:
tags_by_image_id[tag.image.docker_image_id].append(tag.name) tags_by_docker_id[tag.image.docker_image_id].append(tag.name)
found_image_ids.add(str(tag.image.id)) found_image_ids.add(tag.image.id)
found_image_ids.update(tag.image.ancestors.split('/')[1:-1]) found_image_ids.update(tag.image.ancestor_id_list())
image_map = {} image_map = {}
filtered_images = [] filtered_images = []
for image in all_images: for image in all_images:
if str(image.id) in found_image_ids: if image.id in found_image_ids:
image_map[str(image.id)] = image image_map[image.id] = image
filtered_images.append(image) filtered_images.append(image)
def add_tags(image_json): def add_tags(image_json):
image_json['tags'] = tags_by_image_id[image_json['id']] image_json['tags'] = tags_by_docker_id[image_json['id']]
return image_json return image_json
return { return {
@ -98,7 +98,7 @@ class RepositoryImage(RepositoryParamResource):
# Lookup all the ancestor images for the image. # Lookup all the ancestor images for the image.
image_map = {} image_map = {}
for current_image in model.image.get_parent_images(namespace, repository, image): for current_image in model.image.get_parent_images(namespace, repository, image):
image_map[str(current_image.id)] = current_image image_map[current_image.id] = current_image
return historical_image_view(image, image_map) return historical_image_view(image, image_map)

View file

@ -1,13 +1,13 @@
import unittest import unittest
import time import time
from contextlib import contextmanager
from playhouse.test_utils import assert_query_count from playhouse.test_utils import assert_query_count
from app import app, storage from app import app, storage
from initdb import setup_database_for_testing, finished_database_for_testing from initdb import setup_database_for_testing, finished_database_for_testing
from data import model, database from data import model, database
from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel
from endpoints.v2.manifest import _generate_and_store_manifest
ADMIN_ACCESS_USER = 'devtable' ADMIN_ACCESS_USER = 'devtable'
@ -16,48 +16,6 @@ PUBLIC_USER = 'public'
REPO = 'somerepo' REPO = 'somerepo'
class assert_no_new_dangling_labels(object):
""" Specialized assertion for ensuring that GC cleans up all labels.
"""
def __init__(self):
self.existing_count = 0
def _get_dangling_count(self):
label_ids = set([current.id for current in Label.select()])
referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()])
return len(label_ids - referenced_by_manifest)
def __enter__(self):
self.existing_count = self._get_dangling_count()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
updated_count = self._get_dangling_count()
assert updated_count == self.existing_count
class assert_no_new_dangling_storages(object):
""" Specialized assertion for ensuring that GC cleans up all dangling storages.
"""
def __init__(self):
self.existing_count = 0
def _get_dangling_count(self):
storage_ids = set([current.id for current in ImageStorage.select()])
referneced_by_image = set([image.storage_id for image in Image.select()])
referenced_by_derived = set([derived.derivative_id for derived in DerivedStorageForImage.select()])
return len(storage_ids - referneced_by_image - referenced_by_derived)
def __enter__(self):
self.existing_count = self._get_dangling_count()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
updated_count = self._get_dangling_count()
assert updated_count == self.existing_count
class TestGarbageCollection(unittest.TestCase): class TestGarbageCollection(unittest.TestCase):
@staticmethod @staticmethod
def _set_tag_expiration_policy(namespace, expiration_s): def _set_tag_expiration_policy(namespace, expiration_s):
@ -78,7 +36,8 @@ class TestGarbageCollection(unittest.TestCase):
finished_database_for_testing(self) finished_database_for_testing(self)
self.ctx.__exit__(True, None, None) self.ctx.__exit__(True, None, None)
def createImage(self, docker_image_id, repository_obj, username): @staticmethod
def createImage(docker_image_id, repository_obj, username):
preferred = storage.preferred_locations[0] preferred = storage.preferred_locations[0]
image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {}, image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {},
preferred) preferred)
@ -91,10 +50,9 @@ class TestGarbageCollection(unittest.TestCase):
# Add some torrent info. # Add some torrent info.
try: try:
database.TorrentInfo.get(storage=image.storage)
except database.TorrentInfo.DoesNotExist:
model.storage.save_torrent_info(image.storage, 1, 'helloworld') model.storage.save_torrent_info(image.storage, 1, 'helloworld')
model.storage.save_torrent_info(image.storage, 2, 'helloworlds!')
except:
pass
# Add some additional placements to the image. # Add some additional placements to the image.
for location_name in ['local_eu']: for location_name in ['local_eu']:
@ -144,18 +102,17 @@ class TestGarbageCollection(unittest.TestCase):
return repo return repo
def gcNow(self, repository): def gcNow(self, repository):
model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name) self.assertTrue(model.repository.garbage_collect_repo(repository))
def deleteTag(self, repository, tag, perform_gc=True): def deleteTag(self, repository, tag, perform_gc=True):
model.tag.delete_tag(repository.namespace_user.username, repository.name, tag) model.tag.delete_tag(repository.namespace_user.username, repository.name, tag)
if perform_gc: if perform_gc:
model.repository.garbage_collect_repository(repository.namespace_user.username, self.assertTrue(model.repository.garbage_collect_repo(repository))
repository.name)
def moveTag(self, repository, tag, docker_image_id): def moveTag(self, repository, tag, docker_image_id):
model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag, model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag,
docker_image_id) docker_image_id)
model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name) self.assertTrue(model.repository.garbage_collect_repo(repository))
def assertNotDeleted(self, repository, *args): def assertNotDeleted(self, repository, *args):
for docker_image_id in args: for docker_image_id in args:
@ -173,19 +130,49 @@ class TestGarbageCollection(unittest.TestCase):
self.fail('Expected image %s to be deleted' % docker_image_id) self.fail('Expected image %s to be deleted' % docker_image_id)
@staticmethod
def _get_dangling_storage_count():
storage_ids = set([current.id for current in ImageStorage.select()])
referenced_by_image = set([image.storage_id for image in Image.select()])
referenced_by_derived = set([derived.derivative_id
for derived in DerivedStorageForImage.select()])
return len(storage_ids - referenced_by_image - referenced_by_derived)
@staticmethod
def _get_dangling_label_count():
label_ids = set([current.id for current in Label.select()])
referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()])
return len(label_ids - referenced_by_manifest)
@contextmanager
def assert_no_new_dangling_storages_or_labels(self):
""" Specialized assertion for ensuring that GC cleans up all dangling storages
and labels.
"""
# TODO: Consider also asserting the number of DB queries being performed.
existing_storage_count = self._get_dangling_storage_count()
existing_label_count = self._get_dangling_label_count()
yield
updated_storage_count = self._get_dangling_storage_count()
self.assertEqual(updated_storage_count, existing_storage_count)
updated_label_count = self._get_dangling_label_count()
self.assertEqual(updated_label_count, existing_label_count)
def test_has_garbage(self): def test_has_garbage(self):
""" Remove all existing repositories, then add one without garbage, check, then add one with """ Remove all existing repositories, then add one without garbage, check, then add one with
garbage, and check again. garbage, and check again.
""" """
# Delete all existing repos. # Delete all existing repos.
for repo in database.Repository.select(): for repo in database.Repository.select().order_by(database.Repository.id):
model.repository.purge_repository(repo.namespace_user.username, repo.name) self.assertTrue(model.repository.purge_repository(repo.namespace_user.username, repo.name))
# Change the time machine expiration on the namespace. # Change the time machine expiration on the namespace.
(database.User.update(removed_tag_expiration_s=1000000000) (database.User
.where(database.User.username == ADMIN_ACCESS_USER) .update(removed_tag_expiration_s=1000000000)
.execute()) .where(database.User.username == ADMIN_ACCESS_USER)
.execute())
# Create a repository without any garbage. # Create a repository without any garbage.
repository = self.createRepository(latest=['i1', 'i2', 'i3']) repository = self.createRepository(latest=['i1', 'i2', 'i3'])
@ -200,9 +187,10 @@ class TestGarbageCollection(unittest.TestCase):
self.assertIsNone(model.repository.find_repository_with_garbage(1000000000)) self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))
# Change the time machine expiration on the namespace. # Change the time machine expiration on the namespace.
(database.User.update(removed_tag_expiration_s=0) (database.User
.where(database.User.username == ADMIN_ACCESS_USER) .update(removed_tag_expiration_s=0)
.execute()) .where(database.User.username == ADMIN_ACCESS_USER)
.execute())
# Now we should find the repository for GC. # Now we should find the repository for GC.
repository = model.repository.find_repository_with_garbage(0) repository = model.repository.find_repository_with_garbage(0)
@ -210,191 +198,158 @@ class TestGarbageCollection(unittest.TestCase):
self.assertEquals(REPO, repository.name) self.assertEquals(REPO, repository.name)
# GC the repository. # GC the repository.
model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name) self.assertTrue(model.repository.garbage_collect_repo(repository))
# There should now be no repositories with garbage. # There should now be no repositories with garbage.
self.assertIsNone(model.repository.find_repository_with_garbage(0)) self.assertIsNone(model.repository.find_repository_with_garbage(0))
def test_find_garbage_policy_functions(self): def test_find_garbage_policy_functions(self):
with assert_query_count(1): with assert_query_count(1):
one_policy = model.repository.get_random_gc_policy() one_policy = model.repository.get_random_gc_policy()
all_policies = model.repository._get_gc_expiration_policies() all_policies = model.repository._get_gc_expiration_policies()
self.assertIn(one_policy, all_policies) self.assertIn(one_policy, all_policies)
def test_one_tag(self): def test_one_tag(self):
""" Create a repository with a single tag, then remove that tag and verify that the repository """ Create a repository with a single tag, then remove that tag and verify that the repository
is now empty. """ is now empty. """
with assert_no_new_dangling_labels(): with self.assert_no_new_dangling_storages_or_labels():
with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'])
repository = self.createRepository(latest=['i1', 'i2', 'i3']) self.deleteTag(repository, 'latest')
self.deleteTag(repository, 'latest') self.assertDeleted(repository, 'i1', 'i2', 'i3')
self.assertDeleted(repository, 'i1', 'i2', 'i3')
def test_two_tags_unshared_images(self): def test_two_tags_unshared_images(self):
""" Repository has two tags with no shared images between them. """ """ Repository has two tags with no shared images between them. """
with assert_no_new_dangling_labels(): with self.assert_no_new_dangling_storages_or_labels():
with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2'])
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2']) self.deleteTag(repository, 'latest')
self.deleteTag(repository, 'latest') self.assertDeleted(repository, 'i1', 'i2', 'i3')
self.assertDeleted(repository, 'i1', 'i2', 'i3') self.assertNotDeleted(repository, 'f1', 'f2')
self.assertNotDeleted(repository, 'f1', 'f2')
def test_two_tags_shared_images(self): def test_two_tags_shared_images(self):
""" Repository has two tags with shared images. Deleting the tag should only remove the """ Repository has two tags with shared images. Deleting the tag should only remove the
unshared images. unshared images.
""" """
with assert_no_new_dangling_labels(): with self.assert_no_new_dangling_storages_or_labels():
with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) self.deleteTag(repository, 'latest')
self.deleteTag(repository, 'latest') self.assertDeleted(repository, 'i2', 'i3')
self.assertDeleted(repository, 'i2', 'i3') self.assertNotDeleted(repository, 'i1', 'f1')
self.assertNotDeleted(repository, 'i1', 'f1')
def test_unrelated_repositories(self): def test_unrelated_repositories(self):
""" Two repositories with different images. Removing the tag from one leaves the other's """ Two repositories with different images. Removing the tag from one leaves the other's
images intact. images intact.
""" """
with assert_no_new_dangling_labels(): with self.assert_no_new_dangling_storages_or_labels():
with assert_no_new_dangling_storages(): repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2')
repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2')
self.deleteTag(repository1, 'latest') self.deleteTag(repository1, 'latest')
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
self.assertNotDeleted(repository2, 'j1', 'j2', 'j3')
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
self.assertNotDeleted(repository2, 'j1', 'j2', 'j3')
def test_related_repositories(self): def test_related_repositories(self):
""" Two repositories with shared images. Removing the tag from one leaves the other's """ Two repositories with shared images. Removing the tag from one leaves the other's
images intact. images intact.
""" """
with assert_no_new_dangling_labels(): with self.assert_no_new_dangling_storages_or_labels():
with assert_no_new_dangling_storages(): repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2')
repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2')
self.deleteTag(repository1, 'latest') self.deleteTag(repository1, 'latest')
self.assertDeleted(repository1, 'i3')
self.assertNotDeleted(repository2, 'i1', 'i2', 'j1')
self.assertDeleted(repository1, 'i3')
self.assertNotDeleted(repository2, 'i1', 'i2', 'j1')
def test_inaccessible_repositories(self): def test_inaccessible_repositories(self):
""" Two repositories under different namespaces should result in the images being deleted """ Two repositories under different namespaces should result in the images being deleted
but not completely removed from the database. but not completely removed from the database.
""" """
with assert_no_new_dangling_labels(): with self.assert_no_new_dangling_storages_or_labels():
with assert_no_new_dangling_storages(): repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3'])
repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3']) repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3'])
repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3'])
self.deleteTag(repository1, 'latest')
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
self.assertNotDeleted(repository2, 'i1', 'i2', 'i3')
self.deleteTag(repository1, 'latest')
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
self.assertNotDeleted(repository2, 'i1', 'i2', 'i3')
def test_multiple_shared_images(self): def test_multiple_shared_images(self):
""" Repository has multiple tags with shared images. Selectively deleting the tags, and """ Repository has multiple tags with shared images. Selectively deleting the tags, and
verifying at each step. verifying at each step.
""" """
with assert_no_new_dangling_labels(): with self.assert_no_new_dangling_storages_or_labels():
with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
# Delete tag other. Should delete f2, since it is not shared. # Delete tag other. Should delete f2, since it is not shared.
self.deleteTag(repository, 'other') self.deleteTag(repository, 'other')
self.assertDeleted(repository, 'f2') self.assertDeleted(repository, 'f2')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1')
# Move tag fourth to i3. This should remove f1 since it is no longer referenced. # Move tag fourth to i3. This should remove f1 since it is no longer referenced.
self.moveTag(repository, 'fourth', 'i3') self.moveTag(repository, 'fourth', 'i3')
self.assertDeleted(repository, 'f1') self.assertDeleted(repository, 'f1')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
# Delete tag 'latest'. This should do nothing since fourth is on the same branch. # Delete tag 'latest'. This should do nothing since fourth is on the same branch.
self.deleteTag(repository, 'latest') self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
# Delete tag 'third'. This should remove t1->t3. # Delete tag 'third'. This should remove t1->t3.
self.deleteTag(repository, 'third') self.deleteTag(repository, 'third')
self.assertDeleted(repository, 't1', 't2', 't3') self.assertDeleted(repository, 't1', 't2', 't3')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3') self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
# Add tag to i1. # Add tag to i1.
self.moveTag(repository, 'newtag', 'i1') self.moveTag(repository, 'newtag', 'i1')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3') self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
# Delete tag 'fourth'. This should remove i2 and i3. # Delete tag 'fourth'. This should remove i2 and i3.
self.deleteTag(repository, 'fourth') self.deleteTag(repository, 'fourth')
self.assertDeleted(repository, 'i2', 'i3') self.assertDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1') self.assertNotDeleted(repository, 'i1')
# Delete tag 'newtag'. This should remove the remaining image.
self.deleteTag(repository, 'newtag')
self.assertDeleted(repository, 'i1')
# Delete tag 'newtag'. This should remove the remaining image.
self.deleteTag(repository, 'newtag')
self.assertDeleted(repository, 'i1')
def test_empty_gc(self): def test_empty_gc(self):
with assert_no_new_dangling_labels(): with self.assert_no_new_dangling_storages_or_labels():
with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
self.gcNow(repository)
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2')
self.gcNow(repository)
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2')
def test_time_machine_no_gc(self): def test_time_machine_no_gc(self):
""" Repository has two tags with shared images. Deleting the tag should not remove any images """ Repository has two tags with shared images. Deleting the tag should not remove any images
""" """
with assert_no_new_dangling_labels(): with self.assert_no_new_dangling_storages_or_labels():
with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24)
self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24)
self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1')
self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1')
def test_time_machine_gc(self): def test_time_machine_gc(self):
""" Repository has two tags with shared images. Deleting the second tag should cause the images """ Repository has two tags with shared images. Deleting the second tag should cause the images
for the first deleted tag to gc. for the first deleted tag to gc.
""" """
with assert_no_new_dangling_labels(): with self.assert_no_new_dangling_storages_or_labels():
with assert_no_new_dangling_storages(): repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
self._set_tag_expiration_policy(repository.namespace_user.username, 1) self._set_tag_expiration_policy(repository.namespace_user.username, 1)
self.deleteTag(repository, 'latest') self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i2', 'i3') self.assertNotDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1') self.assertNotDeleted(repository, 'i1', 'f1')
time.sleep(2) time.sleep(2)
self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc
self.assertDeleted(repository, 'i2', 'i3') self.assertDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1') self.assertNotDeleted(repository, 'i1', 'f1')
def test_manifest_gc(self):
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
_generate_and_store_manifest(ADMIN_ACCESS_USER, REPO, 'latest')
self._set_tag_expiration_policy(repository.namespace_user.username, 0)
self.deleteTag(repository, 'latest')
self.assertDeleted(repository, 'i2', 'i3')
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -20,7 +20,8 @@ class TestImageTree(unittest.TestCase):
finished_database_for_testing(self) finished_database_for_testing(self)
self.ctx.__exit__(True, None, None) self.ctx.__exit__(True, None, None)
def _get_base_image(self, all_images): @staticmethod
def _get_base_image(all_images):
for image in all_images: for image in all_images:
if image.ancestors == '/': if image.ancestors == '/':
return image return image
@ -90,29 +91,27 @@ class TestImageTree(unittest.TestCase):
# still return the tag that contains them. # still return the tag that contains them.
self.assertEquals('staging', tree.tag_containing_image(result[0])) self.assertEquals('staging', tree.tag_containing_image(result[0]))
def test_longest_path_simple_repo_direct_lookup(self): def test_longest_path_simple_repo_direct_lookup(self):
repository = model.repository.get_repository(NAMESPACE, SIMPLE_REPO) repository = model.repository.get_repository(NAMESPACE, SIMPLE_REPO)
all_images = list(model.image.get_repository_images(NAMESPACE, SIMPLE_REPO)) all_images = list(model.image.get_repository_images(NAMESPACE, SIMPLE_REPO))
all_tags = list(model.tag.list_repository_tags(NAMESPACE, SIMPLE_REPO)) all_tags = list(model.tag.list_repository_tags(NAMESPACE, SIMPLE_REPO))
base_image = self._get_base_image(all_images) base_image = self._get_base_image(all_images)
tag_image = all_tags[0].image
def checker(index, image): def checker(index, image):
return True return True
filtered_images = model.image.get_repository_images_without_placements(repository, filtered_images = model.image.get_repository_images_without_placements(
with_ancestor=base_image) repository,
with_ancestor=base_image)
self.assertEquals(set([f.id for f in filtered_images]), set([a.id for a in all_images])) self.assertEquals(set([f.id for f in filtered_images]), set([a.id for a in all_images]))
tree = ImageTree(filtered_images, all_tags) tree = ImageTree(filtered_images, all_tags)
ancestors = tag_image.ancestors.split('/')[2:-1] # Skip the first image.
result = tree.find_longest_path(base_image.id, checker) result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(3, len(result)) self.assertEquals(3, len(result))
self.assertEquals('latest', tree.tag_containing_image(result[-1])) self.assertEquals('latest', tree.tag_containing_image(result[-1]))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -36,7 +36,8 @@ class TestManifests(unittest.TestCase):
def _perform_cleanup(self): def _perform_cleanup(self):
database.RepositoryTag.delete().where(database.RepositoryTag.hidden == True).execute() database.RepositoryTag.delete().where(database.RepositoryTag.hidden == True).execute()
model.repository.garbage_collect_repository(ADMIN_ACCESS_USER, REPO) repo_object = model.repository.get_repository(ADMIN_ACCESS_USER, REPO)
model.repository.garbage_collect_repo(repo_object)
def test_missing_link(self): def test_missing_link(self):
""" Tests for a corner case that could result in missing a link to a blob referenced by a """ Tests for a corner case that could result in missing a link to a blob referenced by a

View file

@ -1,3 +1,6 @@
from collections import defaultdict
class ImageTreeNode(object): class ImageTreeNode(object):
""" A node in the image tree. """ """ A node in the image tree. """
def __init__(self, image, child_map): def __init__(self, image, child_map):
@ -9,7 +12,7 @@ class ImageTreeNode(object):
@property @property
def children(self): def children(self):
return self._child_map.get(str(self.image.id), []) return self._child_map[self.image.id]
def add_tag(self, tag): def add_tag(self, tag):
self.tags.append(tag) self.tags.append(tag)
@ -20,18 +23,18 @@ class ImageTree(object):
def __init__(self, all_images, all_tags, base_filter=None): def __init__(self, all_images, all_tags, base_filter=None):
self._image_map = {} self._image_map = {}
self._child_map = {} self._child_map = defaultdict(list)
self._build(all_images, all_tags, base_filter) self._build(all_images, all_tags, base_filter)
def _build(self, all_images, all_tags, base_filter=None): def _build(self, all_images, all_tags, base_filter=None):
# Build nodes for each of the images. # Build nodes for each of the images.
for image in all_images: for image in all_images:
ancestors = image.ancestors.split('/')[1:-1] ancestors = image.ancestor_id_list()
# Filter any unneeded images. # Filter any unneeded images.
if base_filter is not None: if base_filter is not None:
if image.id != base_filter and not str(base_filter) in ancestors: if image.id != base_filter and not base_filter in ancestors:
continue continue
# Create the node for the image. # Create the node for the image.
@ -39,11 +42,8 @@ class ImageTree(object):
self._image_map[image.id] = image_node self._image_map[image.id] = image_node
# Add the node to the child map for its parent image (if any). # Add the node to the child map for its parent image (if any).
parent_image_id = image.ancestors.split('/')[-2] if image.ancestors else None parent_image_id = image.parent_id
if parent_image_id: if parent_image_id is not None:
if not parent_image_id in self._child_map:
self._child_map[parent_image_id] = []
self._child_map[parent_image_id].append(image_node) self._child_map[parent_image_id].append(image_node)
# Build the tag map. # Build the tag map.
@ -54,7 +54,6 @@ class ImageTree(object):
image_node.add_tag(tag.name) image_node.add_tag(tag.name)
def find_longest_path(self, image_id, checker): def find_longest_path(self, image_id, checker):
""" Returns a list of images representing the longest path that matches the given """ Returns a list of images representing the longest path that matches the given
checker function, starting from the given image_id *exclusive*. checker function, starting from the given image_id *exclusive*.
@ -65,7 +64,6 @@ class ImageTree(object):
return self._find_longest_path(start_node, checker, -1)[1:] return self._find_longest_path(start_node, checker, -1)[1:]
def _find_longest_path(self, image_node, checker, index): def _find_longest_path(self, image_node, checker, index):
found_path = [] found_path = []
@ -79,7 +77,6 @@ class ImageTree(object):
return [image_node.image] + found_path return [image_node.image] + found_path
def tag_containing_image(self, image): def tag_containing_image(self, image):
""" Returns the name of the closest tag containing the given image. """ """ Returns the name of the closest tag containing the given image. """
if not image: if not image:
@ -99,4 +96,4 @@ class ImageTree(object):
if found is not None: if found is not None:
return found return found
return None return None

View file

@ -35,7 +35,7 @@ def backfill_aggregate_sizes():
aggregate_size = image.storage.image_size aggregate_size = image.storage.image_size
image_ids = image.ancestors.split('/')[1:-1] image_ids = image.ancestor_id_list()
for image_id in image_ids: for image_id in image_ids:
to_add = db_for_update(Image to_add = db_for_update(Image
.select(Image, ImageStorage) .select(Image, ImageStorage)