Merge pull request #1772 from jakedt/alternategc

Alternategc
This commit is contained in:
Jake Moshenko 2016-09-07 13:41:30 -04:00 committed by GitHub
commit 4d11e9b4f5
10 changed files with 358 additions and 325 deletions

View file

@ -34,6 +34,7 @@ _SCHEME_DRIVERS = {
'postgresql+psycopg2': PostgresqlDatabase,
}
SCHEME_RANDOM_FUNCTION = {
'mysql': fn.Rand,
'mysql+pymysql': fn.Rand,
@ -42,12 +43,37 @@ SCHEME_RANDOM_FUNCTION = {
'postgresql+psycopg2': fn.Random,
}
def pipes_concat(arg1, arg2, *extra_args):
""" Concat function for sqlite, since it doesn't support fn.Concat.
Concatenates clauses with || characters.
"""
reduced = arg1.concat(arg2)
for arg in extra_args:
reduced = reduced.concat(arg)
return reduced
def function_concat(arg1, arg2, *extra_args):
""" Default implementation of concat which uses fn.Concat(). Used by all
database engines except sqlite.
"""
return fn.Concat(arg1, arg2, *extra_args)
SCHEME_SPECIALIZED_CONCAT = {
'sqlite': pipes_concat,
}
def real_for_update(query):
return query.for_update()
def null_for_update(query):
return query
def delete_instance_filtered(instance, model_class, delete_nullable, skip_transitive_deletes):
""" Deletes the DB instance recursively, skipping any models in the skip_transitive_deletes set.
@ -181,6 +207,7 @@ read_slave = Proxy()
db_random_func = CallableProxy()
db_for_update = CallableProxy()
db_transaction = CallableProxy()
db_concat_func = CallableProxy()
def validate_database_url(url, db_kwargs, connect_timeout=5):
@ -227,6 +254,8 @@ def configure(config_object):
db_random_func.initialize(SCHEME_RANDOM_FUNCTION[parsed_write_uri.drivername])
db_for_update.initialize(SCHEME_SPECIALIZED_FOR_UPDATE.get(parsed_write_uri.drivername,
real_for_update))
db_concat_func.initialize(SCHEME_SPECIALIZED_CONCAT.get(parsed_write_uri.drivername,
function_concat))
read_slave_uri = config_object.get('DB_READ_SLAVE_URI', None)
if read_slave_uri is not None:
@ -616,6 +645,12 @@ class Image(BaseModel):
(('security_indexed_engine', 'security_indexed'), False),
)
def ancestor_id_list(self):
""" Returns an integer list of ancestor ids, ordered chronologically from
root to direct parent.
"""
return map(int, self.ancestors.split('/')[1:-1])
_ImageProxy.initialize(Image)

View file

@ -397,9 +397,7 @@ def get_repo_image_by_storage_checksum(namespace, repository_name, storage_check
def get_image_layers(image):
""" Returns a list of the full layers of an image, including itself (if specified), sorted
from base image outward. """
ancestors = image.ancestors.split('/')[1:-1]
image_ids = [ancestor_id for ancestor_id in ancestors if ancestor_id]
image_ids.append(str(image.id))
image_ids = image.ancestor_id_list() + [image.id]
query = (ImageStoragePlacement
.select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation)
@ -410,7 +408,7 @@ def get_image_layers(image):
.where(Image.id << image_ids))
image_list = list(invert_placement_query_results(query))
image_list.sort(key=lambda image: image_ids.index(str(image.id)))
image_list.sort(key=lambda img: image_ids.index(img.id))
return image_list

View file

@ -11,7 +11,7 @@ from data.database import (Repository, Namespace, RepositoryTag, Star, Image, Us
Visibility, RepositoryPermission, RepositoryActionCount,
Role, RepositoryAuthorizedEmail, TagManifest, DerivedStorageForImage,
Label, TagManifestLabel, db_for_update, get_epoch_timestamp,
db_random_func)
db_random_func, db_concat_func)
logger = logging.getLogger(__name__)
@ -43,50 +43,38 @@ def get_repository(namespace_name, repository_name):
return None
def _purge_all_repository_tags(namespace_name, repository_name):
""" Immediately purge all repository tags without respecting the lifeline procedure """
try:
repo = _basequery.get_existing_repository(namespace_name, repository_name)
except Repository.DoesNotExist:
raise DataModelException('Invalid repository \'%s/%s\'' %
(namespace_name, repository_name))
# Finds all the tags to delete.
repo_tags = list(RepositoryTag.select().where(RepositoryTag.repository == repo.id))
if not repo_tags:
return
# Find all labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
.where(TagManifestLabel.repository == repo))
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
if label_ids:
# Delete all the mapping entries.
TagManifestLabel.delete().where(TagManifestLabel.repository == repo).execute()
# Delete all the matching labels.
Label.delete().where(Label.id << label_ids).execute()
# Delete all the manifests.
TagManifest.delete().where(TagManifest.tag << repo_tags).execute()
# Delete all tags.
RepositoryTag.delete().where(RepositoryTag.repository == repo.id).execute()
def purge_repository(namespace_name, repository_name):
""" Completely delete all traces of the repository. Will return True upon
complete success, and False upon partial or total failure. Garbage
collection is incremental and repeatable, so this return value does
not need to be checked or responded to.
"""
repo = _basequery.get_existing_repository(namespace_name, repository_name)
# Delete all tags to allow gc to reclaim storage
_purge_all_repository_tags(namespace_name, repository_name)
previously_referenced = tag.purge_all_tags(repo)
unreferenced_image_q = Image.select(Image.id).where(Image.repository == repo)
if len(previously_referenced) > 0:
unreferenced_image_q = (unreferenced_image_q
.where(~(Image.id << list(previously_referenced))))
unreferenced_candidates = set(img[0] for img in unreferenced_image_q.tuples())
# Gc to remove the images and storage
garbage_collect_repository(namespace_name, repository_name)
all_repo_images = previously_referenced | unreferenced_candidates
successful_gc = garbage_collect_repo(repo, all_repo_images)
if not successful_gc:
return False
# Delete the rest of the repository metadata
fetched = _basequery.get_existing_repository(namespace_name, repository_name)
fetched.delete_instance(recursive=True, delete_nullable=False)
return True
@ttl_cache(maxsize=1, ttl=600)
def _get_gc_expiration_policies():
@ -135,34 +123,53 @@ def find_repository_with_garbage(limit_to_gc_policy_s):
return None
def garbage_collect_repository(namespace_name, repository_name):
repo = get_repository(namespace_name, repository_name)
if repo is not None:
garbage_collect_repo(repo)
def garbage_collect_repo(repo):
def garbage_collect_repo(repo, extra_candidate_set=None):
""" Garbage collect the specified repository object. This will remove all
images, derived images, and other associated metadata, for images which
are no longer referenced by a tag or another image which is itself
tagged. Returns True if garbage collection was completed without error
and False otherwise. Retries are safe and work incrementally, so this
return value does not need to be checked or handled.
"""
logger.debug('Garbage collecting repository %s', repo.id)
storage_id_whitelist = set()
tag.garbage_collect_tags(repo)
candidate_orphan_image_set = tag.garbage_collect_tags(repo)
if extra_candidate_set:
candidate_orphan_image_set.update(extra_candidate_set)
if not len(candidate_orphan_image_set):
logger.debug('No candidate images for GC for repo: %s', repo.id)
return True
candidates_orphans = list(candidate_orphan_image_set)
with db_transaction():
# Get a list of all images used by tags in the repository
tagged_images = (Image
.select(Image.id, Image.ancestors)
.join(RepositoryTag)
.where(Image.repository == repo))
Candidate = Image.alias()
Tagged = Image.alias()
ancestor_superset = Tagged.ancestors ** db_concat_func(Candidate.ancestors, Candidate.id, '/%')
def gen_referenced_ancestors():
for tagged_image in tagged_images:
# The ancestor list is in the format '/1/2/3/', extract just the ids
ancestor_id_strings = tagged_image.ancestors.split('/')[1:-1]
for img_id_str in ancestor_id_strings:
yield int(img_id_str)
yield tagged_image.id
# We are going to compute all images which are being referenced in two ways:
# First, we will find all images which have their ancestor paths appear in
# another image. Secondly, we union in all of the candidate images which are
# directly referenced by a tag. This can be used in a subquery to directly
# find which candidates are being referenced without any client side
# computation or extra round trips.
ancestor_referenced = (Candidate
.select(Candidate.id)
.join(Tagged, on=ancestor_superset)
.join(RepositoryTag, on=(Tagged.id == RepositoryTag.image))
.where(RepositoryTag.repository == repo.id,
Candidate.id << candidates_orphans))
referenced_ancestors = set(gen_referenced_ancestors())
direct_referenced = (Candidate
.select(Candidate.id)
.join(RepositoryTag)
.where(RepositoryTag.repository == repo.id,
Candidate.id << candidates_orphans))
referenced_candidates = (direct_referenced | ancestor_referenced)
# We desire two pieces of information from the database from the following
# query: all of the image ids which are associated with this repository,
@ -171,13 +178,18 @@ def garbage_collect_repo(repo):
# code, which is overkill for just two fields, we use a tuple query, and
# feed that directly to the dictionary tuple constructor which takes an
# iterable of tuples containing [(k, v), (k, v), ...]
all_repo_images = Image.select(Image.id, Image.storage).where(Image.repository == repo).tuples()
images_to_storages = dict(all_repo_images)
to_remove = list(set(images_to_storages.keys()).difference(referenced_ancestors))
unreferenced_candidates = (Image
.select(Image.id, Image.storage)
.where(Image.id << candidates_orphans,
~(Image.id << referenced_candidates))
.tuples())
unreferecend_images_to_storages = dict(unreferenced_candidates)
to_remove = unreferecend_images_to_storages.keys()
if len(to_remove) > 0:
logger.info('Cleaning up unreferenced images: %s', to_remove)
storage_id_whitelist = {images_to_storages[to_remove_id] for to_remove_id in to_remove}
storage_id_whitelist = set(unreferecend_images_to_storages.values())
# Lookup any derived images for the images to remove.
derived = DerivedStorageForImage.select().where(
@ -197,18 +209,20 @@ def garbage_collect_repo(repo):
.execute())
except IntegrityError:
logger.info('Could not GC derived images %s; will try again soon', to_remove)
return
return False
try:
Image.delete().where(Image.id << to_remove).execute()
except IntegrityError:
logger.info('Could not GC images %s; will try again soon', to_remove)
return
return False
if len(to_remove) > 0:
logger.info('Garbage collecting storage for images: %s', to_remove)
storage.garbage_collect_storage(storage_id_whitelist)
return True
def star_repository(user, repository):
""" Stars a repository. """

View file

@ -138,64 +138,97 @@ def delete_tag(namespace_name, repository_name, tag_name):
def garbage_collect_tags(repo):
expired_time = get_epoch_timestamp() - repo.namespace_user.removed_tag_expiration_s
""" Remove all of the tags that have gone past their garbage collection
expiration window, and return a set of image ids which *may* have been
orphaned.
"""
def add_expiration_data(base_query):
expired_clause = get_epoch_timestamp() - Namespace.removed_tag_expiration_s
return (base_query
.switch(RepositoryTag)
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(~(RepositoryTag.lifetime_end_ts >> None),
RepositoryTag.lifetime_end_ts <= expired_clause))
return _delete_tags(repo, add_expiration_data)
tags_to_delete = list(RepositoryTag
.select(RepositoryTag.id)
.where(RepositoryTag.repository == repo,
~(RepositoryTag.lifetime_end_ts >> None),
(RepositoryTag.lifetime_end_ts <= expired_time))
.order_by(RepositoryTag.id))
def purge_all_tags(repo):
""" Remove all tags from the repository, and return a set of all of the images
ids which are now orphaned.
"""
return _delete_tags(repo)
if len(tags_to_delete) > 0:
with db_transaction():
manifests_to_delete = list(TagManifest
.select(TagManifest.id)
.join(RepositoryTag)
.where(RepositoryTag.id << tags_to_delete))
def _delete_tags(repo, query_modifier=None):
""" Garbage collect the tags for a repository and return a set of the image
ids which may now be orphaned.
"""
tags_to_delete_q = (RepositoryTag
.select(RepositoryTag.id, Image.ancestors, Image.id)
.join(Image)
.where(RepositoryTag.repository == repo))
num_deleted_manifests = 0
if len(manifests_to_delete) > 0:
# Find the set of IDs for all the labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete))
if query_modifier is not None:
tags_to_delete_q = query_modifier(tags_to_delete_q)
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
if label_ids:
# Delete all the mapping entries.
(TagManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete)
.execute())
tags_to_delete = list(tags_to_delete_q)
# Delete all the matching labels.
Label.delete().where(Label.id << label_ids).execute()
if len(tags_to_delete) == 0:
return set()
# Delete the tag manifests themselves.
num_deleted_manifests = (TagManifest
.delete()
.where(TagManifest.id << manifests_to_delete)
.execute())
with db_transaction():
manifests_to_delete = list(TagManifest
.select(TagManifest.id)
.join(RepositoryTag)
.where(RepositoryTag.id << tags_to_delete))
num_deleted_tags = (RepositoryTag
.delete()
.where(RepositoryTag.id << tags_to_delete)
.execute())
num_deleted_manifests = 0
if len(manifests_to_delete) > 0:
# Find the set of IDs for all the labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete))
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
if label_ids:
# Delete all the mapping entries.
(TagManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete)
.execute())
# Delete all the matching labels.
Label.delete().where(Label.id << label_ids).execute()
num_deleted_manifests = (TagManifest
.delete()
.where(TagManifest.id << manifests_to_delete)
.execute())
num_deleted_tags = (RepositoryTag
.delete()
.where(RepositoryTag.id << tags_to_delete)
.execute())
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
ancestors = reduce(lambda r, l: r | l,
(set(tag.image.ancestor_id_list()) for tag in tags_to_delete))
direct_referenced = {tag.image.id for tag in tags_to_delete}
return ancestors | direct_referenced
def _get_repo_tag_image(tag_name, include_storage, modifier):
query = Image.select().join(RepositoryTag)
if include_storage:
query = (Image.select(Image, ImageStorage)
.join(ImageStorage)
.switch(Image)
.join(RepositoryTag))
query = (Image
.select(Image, ImageStorage)
.join(ImageStorage)
.switch(Image)
.join(RepositoryTag))
images = _tag_alive(modifier(query.where(RepositoryTag.name == tag_name)))
if not images:
@ -213,10 +246,11 @@ def get_repo_tag_image(repo, tag_name, include_storage=False):
def get_tag_image(namespace_name, repository_name, tag_name, include_storage=False):
def modifier(query):
return (query.switch(RepositoryTag)
.join(Repository)
.join(Namespace)
.where(Namespace.username == namespace_name, Repository.name == repository_name))
return (query
.switch(RepositoryTag)
.join(Repository)
.join(Namespace)
.where(Namespace.username == namespace_name, Repository.name == repository_name))
return _get_repo_tag_image(tag_name, include_storage, modifier)

View file

@ -13,7 +13,7 @@ def image_view(image, image_map, include_ancestors=True):
command = image.command
def docker_id(aid):
if not aid or not aid in image_map:
if aid not in image_map:
return ''
return image_map[aid].docker_image_id
@ -30,14 +30,14 @@ def image_view(image, image_map, include_ancestors=True):
if include_ancestors:
# Calculate the ancestors string, with the DBID's replaced with the docker IDs.
ancestors = [docker_id(a) for a in image.ancestors.split('/')]
image_data['ancestors'] = '/'.join(ancestors)
ancestors = [docker_id(a) for a in image.ancestor_id_list()]
image_data['ancestors'] = '/{0}/'.format('/'.join(ancestors))
return image_data
def historical_image_view(image, image_map):
ancestors = [image_map[a] for a in image.ancestors.split('/')[1:-1]]
ancestors = [image_map[a] for a in image.ancestor_id_list()]
normal_view = image_view(image, image_map)
normal_view['history'] = [image_view(parent, image_map, False) for parent in ancestors]
return normal_view
@ -58,23 +58,23 @@ class RepositoryImageList(RepositoryParamResource):
all_images = model.image.get_repository_images_without_placements(repo)
all_tags = model.tag.list_repository_tags(namespace, repository)
tags_by_image_id = defaultdict(list)
tags_by_docker_id = defaultdict(list)
found_image_ids = set()
for tag in all_tags:
tags_by_image_id[tag.image.docker_image_id].append(tag.name)
found_image_ids.add(str(tag.image.id))
found_image_ids.update(tag.image.ancestors.split('/')[1:-1])
tags_by_docker_id[tag.image.docker_image_id].append(tag.name)
found_image_ids.add(tag.image.id)
found_image_ids.update(tag.image.ancestor_id_list())
image_map = {}
filtered_images = []
for image in all_images:
if str(image.id) in found_image_ids:
image_map[str(image.id)] = image
if image.id in found_image_ids:
image_map[image.id] = image
filtered_images.append(image)
def add_tags(image_json):
image_json['tags'] = tags_by_image_id[image_json['id']]
image_json['tags'] = tags_by_docker_id[image_json['id']]
return image_json
return {
@ -98,7 +98,7 @@ class RepositoryImage(RepositoryParamResource):
# Lookup all the ancestor images for the image.
image_map = {}
for current_image in model.image.get_parent_images(namespace, repository, image):
image_map[str(current_image.id)] = current_image
image_map[current_image.id] = current_image
return historical_image_view(image, image_map)

View file

@ -1,13 +1,13 @@
import unittest
import time
from contextlib import contextmanager
from playhouse.test_utils import assert_query_count
from app import app, storage
from initdb import setup_database_for_testing, finished_database_for_testing
from data import model, database
from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel
from endpoints.v2.manifest import _generate_and_store_manifest
ADMIN_ACCESS_USER = 'devtable'
@ -16,48 +16,6 @@ PUBLIC_USER = 'public'
REPO = 'somerepo'
class assert_no_new_dangling_labels(object):
""" Specialized assertion for ensuring that GC cleans up all labels.
"""
def __init__(self):
self.existing_count = 0
def _get_dangling_count(self):
label_ids = set([current.id for current in Label.select()])
referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()])
return len(label_ids - referenced_by_manifest)
def __enter__(self):
self.existing_count = self._get_dangling_count()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
updated_count = self._get_dangling_count()
assert updated_count == self.existing_count
class assert_no_new_dangling_storages(object):
""" Specialized assertion for ensuring that GC cleans up all dangling storages.
"""
def __init__(self):
self.existing_count = 0
def _get_dangling_count(self):
storage_ids = set([current.id for current in ImageStorage.select()])
referneced_by_image = set([image.storage_id for image in Image.select()])
referenced_by_derived = set([derived.derivative_id for derived in DerivedStorageForImage.select()])
return len(storage_ids - referneced_by_image - referenced_by_derived)
def __enter__(self):
self.existing_count = self._get_dangling_count()
return self
def __exit__(self, exc_type, exc_val, exc_tb):
updated_count = self._get_dangling_count()
assert updated_count == self.existing_count
class TestGarbageCollection(unittest.TestCase):
@staticmethod
def _set_tag_expiration_policy(namespace, expiration_s):
@ -78,7 +36,8 @@ class TestGarbageCollection(unittest.TestCase):
finished_database_for_testing(self)
self.ctx.__exit__(True, None, None)
def createImage(self, docker_image_id, repository_obj, username):
@staticmethod
def createImage(docker_image_id, repository_obj, username):
preferred = storage.preferred_locations[0]
image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {},
preferred)
@ -91,10 +50,9 @@ class TestGarbageCollection(unittest.TestCase):
# Add some torrent info.
try:
database.TorrentInfo.get(storage=image.storage)
except database.TorrentInfo.DoesNotExist:
model.storage.save_torrent_info(image.storage, 1, 'helloworld')
model.storage.save_torrent_info(image.storage, 2, 'helloworlds!')
except:
pass
# Add some additional placements to the image.
for location_name in ['local_eu']:
@ -144,18 +102,17 @@ class TestGarbageCollection(unittest.TestCase):
return repo
def gcNow(self, repository):
model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name)
self.assertTrue(model.repository.garbage_collect_repo(repository))
def deleteTag(self, repository, tag, perform_gc=True):
model.tag.delete_tag(repository.namespace_user.username, repository.name, tag)
if perform_gc:
model.repository.garbage_collect_repository(repository.namespace_user.username,
repository.name)
self.assertTrue(model.repository.garbage_collect_repo(repository))
def moveTag(self, repository, tag, docker_image_id):
model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag,
docker_image_id)
model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name)
docker_image_id)
self.assertTrue(model.repository.garbage_collect_repo(repository))
def assertNotDeleted(self, repository, *args):
for docker_image_id in args:
@ -173,19 +130,49 @@ class TestGarbageCollection(unittest.TestCase):
self.fail('Expected image %s to be deleted' % docker_image_id)
@staticmethod
def _get_dangling_storage_count():
storage_ids = set([current.id for current in ImageStorage.select()])
referenced_by_image = set([image.storage_id for image in Image.select()])
referenced_by_derived = set([derived.derivative_id
for derived in DerivedStorageForImage.select()])
return len(storage_ids - referenced_by_image - referenced_by_derived)
@staticmethod
def _get_dangling_label_count():
label_ids = set([current.id for current in Label.select()])
referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()])
return len(label_ids - referenced_by_manifest)
@contextmanager
def assert_no_new_dangling_storages_or_labels(self):
""" Specialized assertion for ensuring that GC cleans up all dangling storages
and labels.
"""
# TODO: Consider also asserting the number of DB queries being performed.
existing_storage_count = self._get_dangling_storage_count()
existing_label_count = self._get_dangling_label_count()
yield
updated_storage_count = self._get_dangling_storage_count()
self.assertEqual(updated_storage_count, existing_storage_count)
updated_label_count = self._get_dangling_label_count()
self.assertEqual(updated_label_count, existing_label_count)
def test_has_garbage(self):
""" Remove all existing repositories, then add one without garbage, check, then add one with
garbage, and check again.
"""
# Delete all existing repos.
for repo in database.Repository.select():
model.repository.purge_repository(repo.namespace_user.username, repo.name)
for repo in database.Repository.select().order_by(database.Repository.id):
self.assertTrue(model.repository.purge_repository(repo.namespace_user.username, repo.name))
# Change the time machine expiration on the namespace.
(database.User.update(removed_tag_expiration_s=1000000000)
.where(database.User.username == ADMIN_ACCESS_USER)
.execute())
(database.User
.update(removed_tag_expiration_s=1000000000)
.where(database.User.username == ADMIN_ACCESS_USER)
.execute())
# Create a repository without any garbage.
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
@ -200,9 +187,10 @@ class TestGarbageCollection(unittest.TestCase):
self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))
# Change the time machine expiration on the namespace.
(database.User.update(removed_tag_expiration_s=0)
.where(database.User.username == ADMIN_ACCESS_USER)
.execute())
(database.User
.update(removed_tag_expiration_s=0)
.where(database.User.username == ADMIN_ACCESS_USER)
.execute())
# Now we should find the repository for GC.
repository = model.repository.find_repository_with_garbage(0)
@ -210,191 +198,158 @@ class TestGarbageCollection(unittest.TestCase):
self.assertEquals(REPO, repository.name)
# GC the repository.
model.repository.garbage_collect_repository(repository.namespace_user.username, repository.name)
self.assertTrue(model.repository.garbage_collect_repo(repository))
# There should now be no repositories with garbage.
self.assertIsNone(model.repository.find_repository_with_garbage(0))
def test_find_garbage_policy_functions(self):
with assert_query_count(1):
one_policy = model.repository.get_random_gc_policy()
all_policies = model.repository._get_gc_expiration_policies()
self.assertIn(one_policy, all_policies)
def test_one_tag(self):
""" Create a repository with a single tag, then remove that tag and verify that the repository
is now empty. """
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
self.deleteTag(repository, 'latest')
self.assertDeleted(repository, 'i1', 'i2', 'i3')
with self.assert_no_new_dangling_storages_or_labels():
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
self.deleteTag(repository, 'latest')
self.assertDeleted(repository, 'i1', 'i2', 'i3')
def test_two_tags_unshared_images(self):
""" Repository has two tags with no shared images between them. """
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2'])
self.deleteTag(repository, 'latest')
self.assertDeleted(repository, 'i1', 'i2', 'i3')
self.assertNotDeleted(repository, 'f1', 'f2')
with self.assert_no_new_dangling_storages_or_labels():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2'])
self.deleteTag(repository, 'latest')
self.assertDeleted(repository, 'i1', 'i2', 'i3')
self.assertNotDeleted(repository, 'f1', 'f2')
def test_two_tags_shared_images(self):
""" Repository has two tags with shared images. Deleting the tag should only remove the
unshared images.
"""
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
self.deleteTag(repository, 'latest')
self.assertDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1')
with self.assert_no_new_dangling_storages_or_labels():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
self.deleteTag(repository, 'latest')
self.assertDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1')
def test_unrelated_repositories(self):
""" Two repositories with different images. Removing the tag from one leaves the other's
images intact.
"""
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2')
with self.assert_no_new_dangling_storages_or_labels():
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2')
self.deleteTag(repository1, 'latest')
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
self.assertNotDeleted(repository2, 'j1', 'j2', 'j3')
self.deleteTag(repository1, 'latest')
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
self.assertNotDeleted(repository2, 'j1', 'j2', 'j3')
def test_related_repositories(self):
""" Two repositories with shared images. Removing the tag from one leaves the other's
images intact.
"""
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2')
with self.assert_no_new_dangling_storages_or_labels():
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2')
self.deleteTag(repository1, 'latest')
self.assertDeleted(repository1, 'i3')
self.assertNotDeleted(repository2, 'i1', 'i2', 'j1')
self.deleteTag(repository1, 'latest')
self.assertDeleted(repository1, 'i3')
self.assertNotDeleted(repository2, 'i1', 'i2', 'j1')
def test_inaccessible_repositories(self):
""" Two repositories under different namespaces should result in the images being deleted
but not completely removed from the database.
"""
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3'])
repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3'])
self.deleteTag(repository1, 'latest')
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
self.assertNotDeleted(repository2, 'i1', 'i2', 'i3')
with self.assert_no_new_dangling_storages_or_labels():
repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3'])
repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3'])
self.deleteTag(repository1, 'latest')
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
self.assertNotDeleted(repository2, 'i1', 'i2', 'i3')
def test_multiple_shared_images(self):
""" Repository has multiple tags with shared images. Selectively deleting the tags, and
verifying at each step.
"""
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
with self.assert_no_new_dangling_storages_or_labels():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
# Delete tag other. Should delete f2, since it is not shared.
self.deleteTag(repository, 'other')
self.assertDeleted(repository, 'f2')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1')
# Delete tag other. Should delete f2, since it is not shared.
self.deleteTag(repository, 'other')
self.assertDeleted(repository, 'f2')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1')
# Move tag fourth to i3. This should remove f1 since it is no longer referenced.
self.moveTag(repository, 'fourth', 'i3')
self.assertDeleted(repository, 'f1')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
# Move tag fourth to i3. This should remove f1 since it is no longer referenced.
self.moveTag(repository, 'fourth', 'i3')
self.assertDeleted(repository, 'f1')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
# Delete tag 'latest'. This should do nothing since fourth is on the same branch.
self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
# Delete tag 'latest'. This should do nothing since fourth is on the same branch.
self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
# Delete tag 'third'. This should remove t1->t3.
self.deleteTag(repository, 'third')
self.assertDeleted(repository, 't1', 't2', 't3')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
# Delete tag 'third'. This should remove t1->t3.
self.deleteTag(repository, 'third')
self.assertDeleted(repository, 't1', 't2', 't3')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
# Add tag to i1.
self.moveTag(repository, 'newtag', 'i1')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
# Add tag to i1.
self.moveTag(repository, 'newtag', 'i1')
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
# Delete tag 'fourth'. This should remove i2 and i3.
self.deleteTag(repository, 'fourth')
self.assertDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1')
# Delete tag 'newtag'. This should remove the remaining image.
self.deleteTag(repository, 'newtag')
self.assertDeleted(repository, 'i1')
# Delete tag 'fourth'. This should remove i2 and i3.
self.deleteTag(repository, 'fourth')
self.assertDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1')
# Delete tag 'newtag'. This should remove the remaining image.
self.deleteTag(repository, 'newtag')
self.assertDeleted(repository, 'i1')
def test_empty_gc(self):
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
self.gcNow(repository)
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2')
with self.assert_no_new_dangling_storages_or_labels():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
self.gcNow(repository)
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2')
def test_time_machine_no_gc(self):
""" Repository has two tags with shared images. Deleting the tag should not remove any images
"""
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24)
self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1')
with self.assert_no_new_dangling_storages_or_labels():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24)
self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1')
def test_time_machine_gc(self):
""" Repository has two tags with shared images. Deleting the second tag should cause the images
for the first deleted tag to gc.
"""
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
with self.assert_no_new_dangling_storages_or_labels():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
self._set_tag_expiration_policy(repository.namespace_user.username, 1)
self._set_tag_expiration_policy(repository.namespace_user.username, 1)
self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1')
self.deleteTag(repository, 'latest')
self.assertNotDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1')
time.sleep(2)
time.sleep(2)
self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc
self.assertDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1')
def test_manifest_gc(self):
with assert_no_new_dangling_labels():
with assert_no_new_dangling_storages():
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
_generate_and_store_manifest(ADMIN_ACCESS_USER, REPO, 'latest')
self._set_tag_expiration_policy(repository.namespace_user.username, 0)
self.deleteTag(repository, 'latest')
self.assertDeleted(repository, 'i2', 'i3')
self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc
self.assertDeleted(repository, 'i2', 'i3')
self.assertNotDeleted(repository, 'i1', 'f1')
if __name__ == '__main__':

View file

@ -20,7 +20,8 @@ class TestImageTree(unittest.TestCase):
finished_database_for_testing(self)
self.ctx.__exit__(True, None, None)
def _get_base_image(self, all_images):
@staticmethod
def _get_base_image(all_images):
for image in all_images:
if image.ancestors == '/':
return image
@ -90,29 +91,27 @@ class TestImageTree(unittest.TestCase):
# still return the tag that contains them.
self.assertEquals('staging', tree.tag_containing_image(result[0]))
def test_longest_path_simple_repo_direct_lookup(self):
repository = model.repository.get_repository(NAMESPACE, SIMPLE_REPO)
all_images = list(model.image.get_repository_images(NAMESPACE, SIMPLE_REPO))
all_tags = list(model.tag.list_repository_tags(NAMESPACE, SIMPLE_REPO))
base_image = self._get_base_image(all_images)
tag_image = all_tags[0].image
def checker(index, image):
return True
filtered_images = model.image.get_repository_images_without_placements(repository,
with_ancestor=base_image)
filtered_images = model.image.get_repository_images_without_placements(
repository,
with_ancestor=base_image)
self.assertEquals(set([f.id for f in filtered_images]), set([a.id for a in all_images]))
tree = ImageTree(filtered_images, all_tags)
ancestors = tag_image.ancestors.split('/')[2:-1] # Skip the first image.
result = tree.find_longest_path(base_image.id, checker)
self.assertEquals(3, len(result))
self.assertEquals('latest', tree.tag_containing_image(result[-1]))
if __name__ == '__main__':
unittest.main()

View file

@ -36,7 +36,8 @@ class TestManifests(unittest.TestCase):
def _perform_cleanup(self):
database.RepositoryTag.delete().where(database.RepositoryTag.hidden == True).execute()
model.repository.garbage_collect_repository(ADMIN_ACCESS_USER, REPO)
repo_object = model.repository.get_repository(ADMIN_ACCESS_USER, REPO)
model.repository.garbage_collect_repo(repo_object)
def test_missing_link(self):
""" Tests for a corner case that could result in missing a link to a blob referenced by a

View file

@ -1,3 +1,6 @@
from collections import defaultdict
class ImageTreeNode(object):
""" A node in the image tree. """
def __init__(self, image, child_map):
@ -9,7 +12,7 @@ class ImageTreeNode(object):
@property
def children(self):
return self._child_map.get(str(self.image.id), [])
return self._child_map[self.image.id]
def add_tag(self, tag):
self.tags.append(tag)
@ -20,18 +23,18 @@ class ImageTree(object):
def __init__(self, all_images, all_tags, base_filter=None):
self._image_map = {}
self._child_map = {}
self._child_map = defaultdict(list)
self._build(all_images, all_tags, base_filter)
def _build(self, all_images, all_tags, base_filter=None):
def _build(self, all_images, all_tags, base_filter=None):
# Build nodes for each of the images.
for image in all_images:
ancestors = image.ancestors.split('/')[1:-1]
ancestors = image.ancestor_id_list()
# Filter any unneeded images.
if base_filter is not None:
if image.id != base_filter and not str(base_filter) in ancestors:
if image.id != base_filter and not base_filter in ancestors:
continue
# Create the node for the image.
@ -39,11 +42,8 @@ class ImageTree(object):
self._image_map[image.id] = image_node
# Add the node to the child map for its parent image (if any).
parent_image_id = image.ancestors.split('/')[-2] if image.ancestors else None
if parent_image_id:
if not parent_image_id in self._child_map:
self._child_map[parent_image_id] = []
parent_image_id = image.parent_id
if parent_image_id is not None:
self._child_map[parent_image_id].append(image_node)
# Build the tag map.
@ -54,7 +54,6 @@ class ImageTree(object):
image_node.add_tag(tag.name)
def find_longest_path(self, image_id, checker):
""" Returns a list of images representing the longest path that matches the given
checker function, starting from the given image_id *exclusive*.
@ -65,7 +64,6 @@ class ImageTree(object):
return self._find_longest_path(start_node, checker, -1)[1:]
def _find_longest_path(self, image_node, checker, index):
found_path = []
@ -79,7 +77,6 @@ class ImageTree(object):
return [image_node.image] + found_path
def tag_containing_image(self, image):
""" Returns the name of the closest tag containing the given image. """
if not image:
@ -99,4 +96,4 @@ class ImageTree(object):
if found is not None:
return found
return None
return None

View file

@ -35,7 +35,7 @@ def backfill_aggregate_sizes():
aggregate_size = image.storage.image_size
image_ids = image.ancestors.split('/')[1:-1]
image_ids = image.ancestor_id_list()
for image_id in image_ids:
to_add = db_for_update(Image
.select(Image, ImageStorage)