commit
89ab94ffbb
6 changed files with 695 additions and 639 deletions
|
@ -54,9 +54,13 @@ def get_public_repo_visibility():
|
|||
return Visibility.get(name='public')
|
||||
|
||||
|
||||
@lru_cache(maxsize=3)
|
||||
def _lookup_team_role(name):
|
||||
return TeamRole.get(name=name)
|
||||
return _lookup_team_roles()[name]
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _lookup_team_roles():
|
||||
return {role.name:role for role in TeamRole.select()}
|
||||
|
||||
|
||||
def filter_to_repos_for_user(query, username=None, namespace=None, repo_kind='image',
|
||||
|
|
|
@ -181,49 +181,59 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
|||
logger.debug('No candidate images for GC for repo: %s', repo.id)
|
||||
return True
|
||||
|
||||
candidates_orphans = list(candidate_orphan_image_set)
|
||||
all_images_removed = set()
|
||||
all_storage_id_whitelist = set()
|
||||
all_unreferenced_candidates = set()
|
||||
|
||||
with db_transaction():
|
||||
Candidate = Image.alias()
|
||||
Tagged = Image.alias()
|
||||
ancestor_superset = Tagged.ancestors ** db_concat_func(Candidate.ancestors, Candidate.id, '/%')
|
||||
# Remove any images directly referenced by tags, to prune the working set.
|
||||
direct_referenced = (RepositoryTag
|
||||
.select(RepositoryTag.image)
|
||||
.where(RepositoryTag.repository == repo.id,
|
||||
RepositoryTag.image << list(candidate_orphan_image_set)))
|
||||
candidate_orphan_image_set.difference_update([t.image_id for t in direct_referenced])
|
||||
|
||||
# We are going to compute all images which are being referenced in two ways:
|
||||
# First, we will find all images which have their ancestor paths appear in
|
||||
# another image. Secondly, we union in all of the candidate images which are
|
||||
# directly referenced by a tag. This can be used in a subquery to directly
|
||||
# find which candidates are being referenced without any client side
|
||||
# computation or extra round trips.
|
||||
direct_referenced = (RepositoryTag
|
||||
.select(RepositoryTag.image)
|
||||
.where(RepositoryTag.repository == repo.id,
|
||||
RepositoryTag.image << candidates_orphans))
|
||||
# Iteratively try to remove images from the database. The only images we can remove are those
|
||||
# that are not referenced by tags AND not the parents of other images. We continue removing images
|
||||
# until no changes are found.
|
||||
iteration = 0
|
||||
making_progress = True
|
||||
while candidate_orphan_image_set and making_progress:
|
||||
iteration = iteration + 1
|
||||
logger.debug('Starting iteration #%s for GC of repository %s with candidates: %s', iteration,
|
||||
repo.id, candidate_orphan_image_set)
|
||||
candidates_orphans = list(candidate_orphan_image_set)
|
||||
|
||||
cloned = direct_referenced.clone().alias('direct_ref')
|
||||
directly_referenced_subquery = Image.alias().select(cloned.c.image_id).from_(cloned)
|
||||
|
||||
ancestor_referenced = (Candidate
|
||||
.select(Candidate.id)
|
||||
.join(Tagged, on=ancestor_superset)
|
||||
.join(RepositoryTag, on=(Tagged.id == RepositoryTag.image))
|
||||
with db_transaction():
|
||||
# Any image directly referenced by a tag that still exists, cannot be GCed.
|
||||
direct_referenced = (RepositoryTag
|
||||
.select(RepositoryTag.image)
|
||||
.where(RepositoryTag.repository == repo.id,
|
||||
Candidate.id << candidates_orphans,
|
||||
~(Candidate.id << directly_referenced_subquery)))
|
||||
RepositoryTag.image << candidates_orphans))
|
||||
|
||||
referenced_candidates = (direct_referenced | ancestor_referenced)
|
||||
# Any image which is the parent of another image, cannot be GCed.
|
||||
parent_referenced = (Image
|
||||
.select(Image.parent)
|
||||
.where(Image.repository == repo.id,
|
||||
Image.parent << candidates_orphans))
|
||||
|
||||
# We desire a few pieces of information from the database from the following
|
||||
# query: all of the image ids which are associated with this repository,
|
||||
# and the storages which are associated with those images.
|
||||
unreferenced_candidates = (Image
|
||||
.select(Image.id, Image.docker_image_id,
|
||||
ImageStorage.id, ImageStorage.uuid)
|
||||
.join(ImageStorage)
|
||||
.where(Image.id << candidates_orphans,
|
||||
~(Image.id << referenced_candidates)))
|
||||
referenced_candidates = (direct_referenced | parent_referenced)
|
||||
|
||||
# We desire a few pieces of information from the database from the following
|
||||
# query: all of the image ids which are associated with this repository,
|
||||
# and the storages which are associated with those images.
|
||||
unreferenced_candidates = (Image
|
||||
.select(Image.id, Image.docker_image_id,
|
||||
ImageStorage.id, ImageStorage.uuid)
|
||||
.join(ImageStorage)
|
||||
.where(Image.id << candidates_orphans,
|
||||
~(Image.id << referenced_candidates)))
|
||||
|
||||
image_ids_to_remove = [candidate.id for candidate in unreferenced_candidates]
|
||||
making_progress = bool(len(image_ids_to_remove))
|
||||
if len(image_ids_to_remove) == 0:
|
||||
# No more candidates to remove.
|
||||
break
|
||||
|
||||
image_ids_to_remove = [candidate.id for candidate in unreferenced_candidates]
|
||||
if len(image_ids_to_remove) > 0:
|
||||
logger.info('Cleaning up unreferenced images: %s', image_ids_to_remove)
|
||||
storage_id_whitelist = set([candidate.storage_id for candidate in unreferenced_candidates])
|
||||
|
||||
|
@ -253,15 +263,22 @@ def garbage_collect_repo(repo, extra_candidate_set=None):
|
|||
logger.info('Could not GC images %s; will try again soon', image_ids_to_remove)
|
||||
return False
|
||||
|
||||
# Add the images to the removed set and remove them from the candidate set.
|
||||
all_images_removed.update(image_ids_to_remove)
|
||||
all_storage_id_whitelist.update(storage_id_whitelist)
|
||||
all_unreferenced_candidates.update(unreferenced_candidates)
|
||||
|
||||
candidate_orphan_image_set.difference_update(image_ids_to_remove)
|
||||
|
||||
# If any images were removed, GC any orphaned storages.
|
||||
if len(image_ids_to_remove) > 0:
|
||||
logger.info('Garbage collecting storage for images: %s', image_ids_to_remove)
|
||||
storage_ids_removed = set(storage.garbage_collect_storage(storage_id_whitelist))
|
||||
if len(all_images_removed) > 0:
|
||||
logger.info('Garbage collecting storage for images: %s', all_images_removed)
|
||||
storage_ids_removed = set(storage.garbage_collect_storage(all_storage_id_whitelist))
|
||||
|
||||
# If any storages were removed and cleanup callbacks are registered, call them with
|
||||
# the images+storages removed.
|
||||
if storage_ids_removed and config.image_cleanup_callbacks:
|
||||
image_storages_removed = [candidate for candidate in unreferenced_candidates
|
||||
image_storages_removed = [candidate for candidate in all_unreferenced_candidates
|
||||
if candidate.storage_id in storage_ids_removed]
|
||||
for callback in config.image_cleanup_callbacks:
|
||||
callback(image_storages_removed)
|
||||
|
|
615
data/model/test/test_gc.py
Normal file
615
data/model/test/test_gc.py
Normal file
|
@ -0,0 +1,615 @@
|
|||
import hashlib
|
||||
import pytest
|
||||
import time
|
||||
|
||||
from mock import patch
|
||||
|
||||
from app import storage
|
||||
from contextlib import contextmanager
|
||||
from playhouse.test_utils import assert_query_count
|
||||
|
||||
from data import model, database
|
||||
from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel, Blob
|
||||
from test.fixtures import *
|
||||
|
||||
|
||||
ADMIN_ACCESS_USER = 'devtable'
|
||||
PUBLIC_USER = 'public'
|
||||
|
||||
REPO = 'somerepo'
|
||||
|
||||
def _set_tag_expiration_policy(namespace, expiration_s):
|
||||
namespace_user = model.user.get_user(namespace)
|
||||
model.user.change_user_tag_expiration(namespace_user, expiration_s)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def default_tag_policy(initialized_db):
|
||||
_set_tag_expiration_policy(ADMIN_ACCESS_USER, 0)
|
||||
_set_tag_expiration_policy(PUBLIC_USER, 0)
|
||||
|
||||
|
||||
def create_image(docker_image_id, repository_obj, username):
|
||||
preferred = storage.preferred_locations[0]
|
||||
image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {},
|
||||
preferred)
|
||||
image.storage.uploading = False
|
||||
image.storage.save()
|
||||
|
||||
# Create derived images as well.
|
||||
model.image.find_or_create_derived_storage(image, 'squash', preferred)
|
||||
model.image.find_or_create_derived_storage(image, 'aci', preferred)
|
||||
|
||||
# Add some torrent info.
|
||||
try:
|
||||
database.TorrentInfo.get(storage=image.storage)
|
||||
except database.TorrentInfo.DoesNotExist:
|
||||
model.storage.save_torrent_info(image.storage, 1, 'helloworld')
|
||||
|
||||
# Add some additional placements to the image.
|
||||
for location_name in ['local_eu']:
|
||||
location = database.ImageStorageLocation.get(name=location_name)
|
||||
|
||||
try:
|
||||
database.ImageStoragePlacement.get(location=location, storage=image.storage)
|
||||
except:
|
||||
continue
|
||||
|
||||
database.ImageStoragePlacement.create(location=location, storage=image.storage)
|
||||
|
||||
return image.storage
|
||||
|
||||
|
||||
def create_repository(namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs):
|
||||
user = model.user.get_user(namespace)
|
||||
repo = model.repository.create_repository(namespace, name, user)
|
||||
|
||||
# Populate the repository with the tags.
|
||||
image_map = {}
|
||||
for tag_name in kwargs:
|
||||
image_ids = kwargs[tag_name]
|
||||
parent = None
|
||||
|
||||
for image_id in image_ids:
|
||||
if not image_id in image_map:
|
||||
image_map[image_id] = create_image(image_id, repo, namespace)
|
||||
|
||||
v1_metadata = {
|
||||
'id': image_id,
|
||||
}
|
||||
if parent is not None:
|
||||
v1_metadata['parent'] = parent.docker_image_id
|
||||
|
||||
# Set the ancestors for the image.
|
||||
parent = model.image.set_image_metadata(image_id, namespace, name, '', '', '', v1_metadata,
|
||||
parent=parent)
|
||||
|
||||
# Set the tag for the image.
|
||||
tag_manifest, _ = model.tag.store_tag_manifest(namespace, name, tag_name, image_ids[-1],
|
||||
'sha:someshahere', '{}')
|
||||
|
||||
# Add some labels to the tag.
|
||||
model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest')
|
||||
model.label.create_manifest_label(tag_manifest, 'meh', 'grah', 'manifest')
|
||||
|
||||
return repo
|
||||
|
||||
|
||||
def gc_now(repository):
|
||||
assert model.repository.garbage_collect_repo(repository)
|
||||
|
||||
|
||||
def delete_tag(repository, tag, perform_gc=True):
|
||||
model.tag.delete_tag(repository.namespace_user.username, repository.name, tag)
|
||||
if perform_gc:
|
||||
assert model.repository.garbage_collect_repo(repository)
|
||||
|
||||
|
||||
def move_tag(repository, tag, docker_image_id):
|
||||
model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag,
|
||||
docker_image_id)
|
||||
assert model.repository.garbage_collect_repo(repository)
|
||||
|
||||
|
||||
def assert_not_deleted(repository, *args):
|
||||
for docker_image_id in args:
|
||||
assert model.image.get_image_by_id(repository.namespace_user.username, repository.name,
|
||||
docker_image_id)
|
||||
|
||||
|
||||
def assert_deleted(repository, *args):
|
||||
for docker_image_id in args:
|
||||
try:
|
||||
# Verify the image is missing when accessed by the repository.
|
||||
model.image.get_image_by_id(repository.namespace_user.username, repository.name,
|
||||
docker_image_id)
|
||||
except model.DataModelException:
|
||||
return
|
||||
|
||||
assert False, 'Expected image %s to be deleted' % docker_image_id
|
||||
|
||||
|
||||
def _get_dangling_storage_count():
|
||||
storage_ids = set([current.id for current in ImageStorage.select()])
|
||||
referenced_by_image = set([image.storage_id for image in Image.select()])
|
||||
referenced_by_derived = set([derived.derivative_id
|
||||
for derived in DerivedStorageForImage.select()])
|
||||
|
||||
return len(storage_ids - referenced_by_image - referenced_by_derived)
|
||||
|
||||
|
||||
def _get_dangling_label_count():
|
||||
label_ids = set([current.id for current in Label.select()])
|
||||
referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()])
|
||||
return len(label_ids - referenced_by_manifest)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def assert_gc_integrity(expect_storage_removed=True):
|
||||
""" Specialized assertion for ensuring that GC cleans up all dangling storages
|
||||
and labels, invokes the callback for images removed and doesn't invoke the
|
||||
callback for images *not* removed.
|
||||
"""
|
||||
# Add a callback for when images are removed.
|
||||
removed_image_storages = []
|
||||
model.config.register_image_cleanup_callback(removed_image_storages.extend)
|
||||
|
||||
# Store the number of dangling storages and labels.
|
||||
existing_storage_count = _get_dangling_storage_count()
|
||||
existing_label_count = _get_dangling_label_count()
|
||||
yield
|
||||
|
||||
# Ensure the number of dangling storages and labels has not changed.
|
||||
updated_storage_count = _get_dangling_storage_count()
|
||||
assert updated_storage_count == existing_storage_count
|
||||
|
||||
updated_label_count = _get_dangling_label_count()
|
||||
assert updated_label_count == existing_label_count
|
||||
|
||||
# Ensure that for each call to the image+storage cleanup callback, the image and its
|
||||
# storage is not found *anywhere* in the database.
|
||||
for removed_image_and_storage in removed_image_storages:
|
||||
with pytest.raises(Image.DoesNotExist):
|
||||
Image.get(id=removed_image_and_storage.id)
|
||||
|
||||
with pytest.raises(ImageStorage.DoesNotExist):
|
||||
ImageStorage.get(id=removed_image_and_storage.storage_id)
|
||||
|
||||
with pytest.raises(ImageStorage.DoesNotExist):
|
||||
ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)
|
||||
|
||||
assert expect_storage_removed == bool(removed_image_storages)
|
||||
|
||||
# Ensure all CAS storage is in the storage engine.
|
||||
preferred = storage.preferred_locations[0]
|
||||
for storage_row in ImageStorage.select():
|
||||
if storage_row.cas_path:
|
||||
storage.get_content({preferred}, storage.blob_path(storage_row.content_checksum))
|
||||
|
||||
for blob_row in Blob.select():
|
||||
storage.get_content({preferred}, storage.blob_path(blob_row.digest))
|
||||
|
||||
|
||||
def test_has_garbage(default_tag_policy, initialized_db):
|
||||
""" Remove all existing repositories, then add one without garbage, check, then add one with
|
||||
garbage, and check again.
|
||||
"""
|
||||
# Delete all existing repos.
|
||||
for repo in database.Repository.select().order_by(database.Repository.id):
|
||||
assert model.repository.purge_repository(repo.namespace_user.username, repo.name)
|
||||
|
||||
# Change the time machine expiration on the namespace.
|
||||
(database.User
|
||||
.update(removed_tag_expiration_s=1000000000)
|
||||
.where(database.User.username == ADMIN_ACCESS_USER)
|
||||
.execute())
|
||||
|
||||
# Create a repository without any garbage.
|
||||
repository = create_repository(latest=['i1', 'i2', 'i3'])
|
||||
|
||||
# Ensure that no repositories are returned by the has garbage check.
|
||||
assert model.repository.find_repository_with_garbage(1000000000) is None
|
||||
|
||||
# Delete a tag.
|
||||
delete_tag(repository, 'latest', perform_gc=False)
|
||||
|
||||
# There should still not be any repositories with garbage, due to time machine.
|
||||
assert model.repository.find_repository_with_garbage(1000000000) is None
|
||||
|
||||
# Change the time machine expiration on the namespace.
|
||||
(database.User
|
||||
.update(removed_tag_expiration_s=0)
|
||||
.where(database.User.username == ADMIN_ACCESS_USER)
|
||||
.execute())
|
||||
|
||||
# Now we should find the repository for GC.
|
||||
repository = model.repository.find_repository_with_garbage(0)
|
||||
assert repository is not None
|
||||
assert repository.name == REPO
|
||||
|
||||
# GC the repository.
|
||||
assert model.repository.garbage_collect_repo(repository)
|
||||
|
||||
# There should now be no repositories with garbage.
|
||||
assert model.repository.find_repository_with_garbage(0) is None
|
||||
|
||||
|
||||
def test_find_garbage_policy_functions(default_tag_policy, initialized_db):
|
||||
with assert_query_count(1):
|
||||
one_policy = model.repository.get_random_gc_policy()
|
||||
all_policies = model.repository._get_gc_expiration_policies()
|
||||
assert one_policy in all_policies
|
||||
|
||||
|
||||
def test_one_tag(default_tag_policy, initialized_db):
|
||||
""" Create a repository with a single tag, then remove that tag and verify that the repository
|
||||
is now empty. """
|
||||
with assert_gc_integrity():
|
||||
repository = create_repository(latest=['i1', 'i2', 'i3'])
|
||||
delete_tag(repository, 'latest')
|
||||
assert_deleted(repository, 'i1', 'i2', 'i3')
|
||||
|
||||
|
||||
def test_two_tags_unshared_images(default_tag_policy, initialized_db):
|
||||
""" Repository has two tags with no shared images between them. """
|
||||
with assert_gc_integrity():
|
||||
repository = create_repository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2'])
|
||||
delete_tag(repository, 'latest')
|
||||
assert_deleted(repository, 'i1', 'i2', 'i3')
|
||||
assert_not_deleted(repository, 'f1', 'f2')
|
||||
|
||||
|
||||
def test_two_tags_shared_images(default_tag_policy, initialized_db):
|
||||
""" Repository has two tags with shared images. Deleting the tag should only remove the
|
||||
unshared images.
|
||||
"""
|
||||
with assert_gc_integrity():
|
||||
repository = create_repository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
||||
delete_tag(repository, 'latest')
|
||||
assert_deleted(repository, 'i2', 'i3')
|
||||
assert_not_deleted(repository, 'i1', 'f1')
|
||||
|
||||
|
||||
def test_unrelated_repositories(default_tag_policy, initialized_db):
|
||||
""" Two repositories with different images. Removing the tag from one leaves the other's
|
||||
images intact.
|
||||
"""
|
||||
with assert_gc_integrity():
|
||||
repository1 = create_repository(latest=['i1', 'i2', 'i3'], name='repo1')
|
||||
repository2 = create_repository(latest=['j1', 'j2', 'j3'], name='repo2')
|
||||
|
||||
delete_tag(repository1, 'latest')
|
||||
|
||||
assert_deleted(repository1, 'i1', 'i2', 'i3')
|
||||
assert_not_deleted(repository2, 'j1', 'j2', 'j3')
|
||||
|
||||
|
||||
def test_related_repositories(default_tag_policy, initialized_db):
|
||||
""" Two repositories with shared images. Removing the tag from one leaves the other's
|
||||
images intact.
|
||||
"""
|
||||
with assert_gc_integrity():
|
||||
repository1 = create_repository(latest=['i1', 'i2', 'i3'], name='repo1')
|
||||
repository2 = create_repository(latest=['i1', 'i2', 'j1'], name='repo2')
|
||||
|
||||
delete_tag(repository1, 'latest')
|
||||
|
||||
assert_deleted(repository1, 'i3')
|
||||
assert_not_deleted(repository2, 'i1', 'i2', 'j1')
|
||||
|
||||
|
||||
def test_inaccessible_repositories(default_tag_policy, initialized_db):
|
||||
""" Two repositories under different namespaces should result in the images being deleted
|
||||
but not completely removed from the database.
|
||||
"""
|
||||
with assert_gc_integrity():
|
||||
repository1 = create_repository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3'])
|
||||
repository2 = create_repository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3'])
|
||||
|
||||
delete_tag(repository1, 'latest')
|
||||
assert_deleted(repository1, 'i1', 'i2', 'i3')
|
||||
assert_not_deleted(repository2, 'i1', 'i2', 'i3')
|
||||
|
||||
|
||||
|
||||
def test_many_multiple_shared_images(default_tag_policy, initialized_db):
|
||||
""" Repository has multiple tags with shared images. Delete all but one tag.
|
||||
"""
|
||||
with assert_gc_integrity():
|
||||
repository = create_repository(latest=['i1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7', 'i8', 'j0'],
|
||||
master=['i1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7', 'i8', 'j1'])
|
||||
|
||||
# Delete tag latest. Should only delete j0, since it is not shared.
|
||||
delete_tag(repository, 'latest')
|
||||
|
||||
assert_deleted(repository, 'j0')
|
||||
assert_not_deleted(repository, 'i1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7', 'i8', 'j1')
|
||||
|
||||
# Delete tag master. Should delete the rest of the images.
|
||||
delete_tag(repository, 'master')
|
||||
|
||||
assert_deleted(repository, 'i1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7', 'i8', 'j1')
|
||||
|
||||
|
||||
def test_multiple_shared_images(default_tag_policy, initialized_db):
|
||||
""" Repository has multiple tags with shared images. Selectively deleting the tags, and
|
||||
verifying at each step.
|
||||
"""
|
||||
with assert_gc_integrity():
|
||||
repository = create_repository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
||||
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
|
||||
|
||||
# Current state:
|
||||
# latest -> i3->i2->i1
|
||||
# other -> f2->f1->i1
|
||||
# third -> t3->t2->t1
|
||||
# fourth -> f1->i1
|
||||
|
||||
# Delete tag other. Should delete f2, since it is not shared.
|
||||
delete_tag(repository, 'other')
|
||||
assert_deleted(repository, 'f2')
|
||||
assert_not_deleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1')
|
||||
|
||||
# Current state:
|
||||
# latest -> i3->i2->i1
|
||||
# third -> t3->t2->t1
|
||||
# fourth -> f1->i1
|
||||
|
||||
# Move tag fourth to i3. This should remove f1 since it is no longer referenced.
|
||||
move_tag(repository, 'fourth', 'i3')
|
||||
assert_deleted(repository, 'f1')
|
||||
assert_not_deleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
|
||||
|
||||
# Current state:
|
||||
# latest -> i3->i2->i1
|
||||
# third -> t3->t2->t1
|
||||
# fourth -> i3->i2->i1
|
||||
|
||||
# Delete tag 'latest'. This should do nothing since fourth is on the same branch.
|
||||
delete_tag(repository, 'latest')
|
||||
assert_not_deleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
|
||||
|
||||
# Current state:
|
||||
# third -> t3->t2->t1
|
||||
# fourth -> i3->i2->i1
|
||||
|
||||
# Delete tag 'third'. This should remove t1->t3.
|
||||
delete_tag(repository, 'third')
|
||||
assert_deleted(repository, 't1', 't2', 't3')
|
||||
assert_not_deleted(repository, 'i1', 'i2', 'i3')
|
||||
|
||||
# Current state:
|
||||
# fourth -> i3->i2->i1
|
||||
|
||||
# Add tag to i1.
|
||||
move_tag(repository, 'newtag', 'i1')
|
||||
assert_not_deleted(repository, 'i1', 'i2', 'i3')
|
||||
|
||||
# Current state:
|
||||
# fourth -> i3->i2->i1
|
||||
# newtag -> i1
|
||||
|
||||
# Delete tag 'fourth'. This should remove i2 and i3.
|
||||
delete_tag(repository, 'fourth')
|
||||
assert_deleted(repository, 'i2', 'i3')
|
||||
assert_not_deleted(repository, 'i1')
|
||||
|
||||
# Current state:
|
||||
# newtag -> i1
|
||||
|
||||
# Delete tag 'newtag'. This should remove the remaining image.
|
||||
delete_tag(repository, 'newtag')
|
||||
assert_deleted(repository, 'i1')
|
||||
|
||||
# Current state:
|
||||
# (Empty)
|
||||
|
||||
|
||||
def test_empty_gc(default_tag_policy, initialized_db):
|
||||
with assert_gc_integrity(expect_storage_removed=False):
|
||||
repository = create_repository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
||||
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
|
||||
|
||||
gc_now(repository)
|
||||
assert_not_deleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2')
|
||||
|
||||
|
||||
def test_time_machine_no_gc(default_tag_policy, initialized_db):
|
||||
""" Repository has two tags with shared images. Deleting the tag should not remove any images
|
||||
"""
|
||||
with assert_gc_integrity(expect_storage_removed=False):
|
||||
repository = create_repository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
||||
_set_tag_expiration_policy(repository.namespace_user.username, 60*60*24)
|
||||
|
||||
delete_tag(repository, 'latest')
|
||||
assert_not_deleted(repository, 'i2', 'i3')
|
||||
assert_not_deleted(repository, 'i1', 'f1')
|
||||
|
||||
|
||||
def test_time_machine_gc(default_tag_policy, initialized_db):
|
||||
""" Repository has two tags with shared images. Deleting the second tag should cause the images
|
||||
for the first deleted tag to gc.
|
||||
"""
|
||||
with assert_gc_integrity():
|
||||
repository = create_repository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
||||
|
||||
_set_tag_expiration_policy(repository.namespace_user.username, 1)
|
||||
|
||||
delete_tag(repository, 'latest')
|
||||
assert_not_deleted(repository, 'i2', 'i3')
|
||||
assert_not_deleted(repository, 'i1', 'f1')
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# This will cause the images associated with latest to gc
|
||||
delete_tag(repository, 'other')
|
||||
assert_deleted(repository, 'i2', 'i3')
|
||||
assert_not_deleted(repository, 'i1', 'f1')
|
||||
|
||||
|
||||
def test_images_shared_storage(default_tag_policy, initialized_db):
|
||||
""" Repository with two tags, both with the same shared storage. Deleting the first
|
||||
tag should delete the first image, but *not* its storage.
|
||||
"""
|
||||
with assert_gc_integrity(expect_storage_removed=False):
|
||||
repository = create_repository()
|
||||
|
||||
# Add two tags, each with their own image, but with the same storage.
|
||||
image_storage = model.storage.create_v1_storage(storage.preferred_locations[0])
|
||||
|
||||
first_image = Image.create(docker_image_id='i1',
|
||||
repository=repository, storage=image_storage,
|
||||
ancestors='/')
|
||||
|
||||
second_image = Image.create(docker_image_id='i2',
|
||||
repository=repository, storage=image_storage,
|
||||
ancestors='/')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'first', first_image.docker_image_id,
|
||||
'sha:someshahere', '{}')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'second', second_image.docker_image_id,
|
||||
'sha:someshahere', '{}')
|
||||
|
||||
# Delete the first tag.
|
||||
delete_tag(repository, 'first')
|
||||
assert_deleted(repository, 'i1')
|
||||
assert_not_deleted(repository, 'i2')
|
||||
|
||||
|
||||
def test_image_with_cas(default_tag_policy, initialized_db):
|
||||
""" A repository with a tag pointing to an image backed by CAS. Deleting and GCing the tag
|
||||
should result in the storage and its CAS data being removed.
|
||||
"""
|
||||
with assert_gc_integrity(expect_storage_removed=True):
|
||||
repository = create_repository()
|
||||
|
||||
# Create an image storage record under CAS.
|
||||
content = 'hello world'
|
||||
digest = 'sha256:' + hashlib.sha256(content).hexdigest()
|
||||
preferred = storage.preferred_locations[0]
|
||||
storage.put_content({preferred}, storage.blob_path(digest), content)
|
||||
|
||||
image_storage = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||
location = database.ImageStorageLocation.get(name=preferred)
|
||||
database.ImageStoragePlacement.create(location=location, storage=image_storage)
|
||||
|
||||
# Ensure the CAS path exists.
|
||||
assert storage.exists({preferred}, storage.blob_path(digest))
|
||||
|
||||
# Create the image and the tag.
|
||||
first_image = Image.create(docker_image_id='i1',
|
||||
repository=repository, storage=image_storage,
|
||||
ancestors='/')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'first', first_image.docker_image_id,
|
||||
'sha:someshahere1', '{}')
|
||||
|
||||
assert_not_deleted(repository, 'i1')
|
||||
|
||||
# Delete the tag.
|
||||
delete_tag(repository, 'first')
|
||||
assert_deleted(repository, 'i1')
|
||||
|
||||
# Ensure the CAS path is gone.
|
||||
assert not storage.exists({preferred}, storage.blob_path(digest))
|
||||
|
||||
|
||||
def test_images_shared_cas(default_tag_policy, initialized_db):
|
||||
""" A repository, each two tags, pointing to the same image, which has image storage
|
||||
with the same *CAS path*, but *distinct records*. Deleting the first tag should delete the
|
||||
first image, and its storage, but not the file in storage, as it shares its CAS path.
|
||||
"""
|
||||
with assert_gc_integrity(expect_storage_removed=True):
|
||||
repository = create_repository()
|
||||
|
||||
# Create two image storage records with the same content checksum.
|
||||
content = 'hello world'
|
||||
digest = 'sha256:' + hashlib.sha256(content).hexdigest()
|
||||
preferred = storage.preferred_locations[0]
|
||||
storage.put_content({preferred}, storage.blob_path(digest), content)
|
||||
|
||||
is1 = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||
is2 = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||
|
||||
location = database.ImageStorageLocation.get(name=preferred)
|
||||
|
||||
database.ImageStoragePlacement.create(location=location, storage=is1)
|
||||
database.ImageStoragePlacement.create(location=location, storage=is2)
|
||||
|
||||
# Ensure the CAS path exists.
|
||||
assert storage.exists({preferred}, storage.blob_path(digest))
|
||||
|
||||
# Create two images in the repository, and two tags, each pointing to one of the storages.
|
||||
first_image = Image.create(docker_image_id='i1',
|
||||
repository=repository, storage=is1,
|
||||
ancestors='/')
|
||||
|
||||
second_image = Image.create(docker_image_id='i2',
|
||||
repository=repository, storage=is2,
|
||||
ancestors='/')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'first', first_image.docker_image_id,
|
||||
'sha:someshahere1', '{}')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'second', second_image.docker_image_id,
|
||||
'sha:someshahere2', '{}')
|
||||
|
||||
assert_not_deleted(repository, 'i1', 'i2')
|
||||
|
||||
# Delete the first tag.
|
||||
delete_tag(repository, 'first')
|
||||
assert_deleted(repository, 'i1')
|
||||
assert_not_deleted(repository, 'i2')
|
||||
|
||||
# Ensure the CAS path still exists.
|
||||
assert storage.exists({preferred}, storage.blob_path(digest))
|
||||
|
||||
|
||||
def test_images_shared_cas_with_new_blob_table(default_tag_policy, initialized_db):
|
||||
""" A repository with a tag and image that shares its CAS path with a record in the new Blob
|
||||
table. Deleting the first tag should delete the first image, and its storage, but not the
|
||||
file in storage, as it shares its CAS path with the blob row.
|
||||
"""
|
||||
with assert_gc_integrity(expect_storage_removed=True):
|
||||
repository = create_repository()
|
||||
|
||||
# Create two image storage records with the same content checksum.
|
||||
content = 'hello world'
|
||||
digest = 'sha256:' + hashlib.sha256(content).hexdigest()
|
||||
preferred = storage.preferred_locations[0]
|
||||
storage.put_content({preferred}, storage.blob_path(digest), content)
|
||||
|
||||
media_type = database.MediaType.get(name='text/plain')
|
||||
|
||||
is1 = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||
database.Blob.create(digest=digest, size=0, media_type=media_type)
|
||||
|
||||
location = database.ImageStorageLocation.get(name=preferred)
|
||||
database.ImageStoragePlacement.create(location=location, storage=is1)
|
||||
|
||||
# Ensure the CAS path exists.
|
||||
assert storage.exists({preferred}, storage.blob_path(digest))
|
||||
|
||||
# Create the image in the repository, and the tag.
|
||||
first_image = Image.create(docker_image_id='i1',
|
||||
repository=repository, storage=is1,
|
||||
ancestors='/')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'first', first_image.docker_image_id,
|
||||
'sha:someshahere1', '{}')
|
||||
|
||||
assert_not_deleted(repository, 'i1')
|
||||
|
||||
# Delete the tag.
|
||||
delete_tag(repository, 'first')
|
||||
assert_deleted(repository, 'i1')
|
||||
|
||||
# Ensure the CAS path still exists, as it is referenced by the Blob table
|
||||
assert storage.exists({preferred}, storage.blob_path(digest))
|
|
@ -8,29 +8,29 @@ from endpoints.api.test.shared import conduct_api_call
|
|||
from endpoints.test.shared import client_with_identity
|
||||
from test.fixtures import *
|
||||
|
||||
@pytest.mark.parametrize('query, expected_query_count', [
|
||||
('simple', 7),
|
||||
('public', 6),
|
||||
('repository', 6),
|
||||
@pytest.mark.parametrize('query', [
|
||||
('simple'),
|
||||
('public'),
|
||||
('repository'),
|
||||
])
|
||||
def test_repository_search(query, expected_query_count, client):
|
||||
def test_repository_search(query, client):
|
||||
with client_with_identity('devtable', client) as cl:
|
||||
params = {'query': query}
|
||||
with assert_query_count(expected_query_count):
|
||||
with assert_query_count(6):
|
||||
result = conduct_api_call(cl, ConductRepositorySearch, 'GET', params, None, 200).json
|
||||
assert result['start_index'] == 0
|
||||
assert result['page'] == 1
|
||||
assert len(result['results'])
|
||||
|
||||
|
||||
@pytest.mark.parametrize('query, expected_query_count', [
|
||||
('simple', 8),
|
||||
('public', 8),
|
||||
('repository', 8),
|
||||
@pytest.mark.parametrize('query', [
|
||||
('simple'),
|
||||
('public'),
|
||||
('repository'),
|
||||
])
|
||||
def test_search_query_count(query, expected_query_count, client):
|
||||
def test_search_query_count(query, client):
|
||||
with client_with_identity('devtable', client) as cl:
|
||||
params = {'query': query}
|
||||
with assert_query_count(expected_query_count):
|
||||
with assert_query_count(8):
|
||||
result = conduct_api_call(cl, ConductSearch, 'GET', params, None, 200).json
|
||||
assert len(result['results'])
|
||||
|
|
|
@ -126,6 +126,11 @@ def initialized_db(appconfig):
|
|||
# Configure the database.
|
||||
configure(appconfig)
|
||||
|
||||
# Initialize caches.
|
||||
model._basequery._lookup_team_roles()
|
||||
model._basequery.get_public_repo_visibility()
|
||||
model.log.get_log_entry_kinds()
|
||||
|
||||
# If under a test *real* database, setup a savepoint.
|
||||
under_test_real_database = bool(os.environ.get('TEST_DATABASE_URI'))
|
||||
if under_test_real_database:
|
||||
|
|
585
test/test_gc.py
585
test/test_gc.py
|
@ -1,585 +0,0 @@
|
|||
import unittest
|
||||
import time
|
||||
import hashlib
|
||||
|
||||
from contextlib import contextmanager
|
||||
from playhouse.test_utils import assert_query_count
|
||||
|
||||
from app import app, storage
|
||||
from initdb import setup_database_for_testing, finished_database_for_testing
|
||||
from data import model, database
|
||||
from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel, Blob
|
||||
|
||||
|
||||
ADMIN_ACCESS_USER = 'devtable'
|
||||
PUBLIC_USER = 'public'
|
||||
|
||||
REPO = 'somerepo'
|
||||
|
||||
|
||||
class TestGarbageCollection(unittest.TestCase):
|
||||
@staticmethod
|
||||
def _set_tag_expiration_policy(namespace, expiration_s):
|
||||
namespace_user = model.user.get_user(namespace)
|
||||
model.user.change_user_tag_expiration(namespace_user, expiration_s)
|
||||
|
||||
def setUp(self):
|
||||
setup_database_for_testing(self)
|
||||
|
||||
self._set_tag_expiration_policy(ADMIN_ACCESS_USER, 0)
|
||||
self._set_tag_expiration_policy(PUBLIC_USER, 0)
|
||||
|
||||
self.app = app.test_client()
|
||||
self.ctx = app.test_request_context()
|
||||
self.ctx.__enter__()
|
||||
|
||||
def tearDown(self):
|
||||
finished_database_for_testing(self)
|
||||
self.ctx.__exit__(True, None, None)
|
||||
|
||||
@staticmethod
|
||||
def createImage(docker_image_id, repository_obj, username):
|
||||
preferred = storage.preferred_locations[0]
|
||||
image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {},
|
||||
preferred)
|
||||
image.storage.uploading = False
|
||||
image.storage.save()
|
||||
|
||||
# Create derived images as well.
|
||||
model.image.find_or_create_derived_storage(image, 'squash', preferred)
|
||||
model.image.find_or_create_derived_storage(image, 'aci', preferred)
|
||||
|
||||
# Add some torrent info.
|
||||
try:
|
||||
database.TorrentInfo.get(storage=image.storage)
|
||||
except database.TorrentInfo.DoesNotExist:
|
||||
model.storage.save_torrent_info(image.storage, 1, 'helloworld')
|
||||
|
||||
# Add some additional placements to the image.
|
||||
for location_name in ['local_eu']:
|
||||
location = database.ImageStorageLocation.get(name=location_name)
|
||||
|
||||
try:
|
||||
database.ImageStoragePlacement.get(location=location, storage=image.storage)
|
||||
except:
|
||||
continue
|
||||
|
||||
database.ImageStoragePlacement.create(location=location, storage=image.storage)
|
||||
|
||||
return image.storage
|
||||
|
||||
def createRepository(self, namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs):
|
||||
user = model.user.get_user(namespace)
|
||||
repo = model.repository.create_repository(namespace, name, user)
|
||||
|
||||
# Populate the repository with the tags.
|
||||
image_map = {}
|
||||
for tag_name in kwargs:
|
||||
image_ids = kwargs[tag_name]
|
||||
parent = None
|
||||
|
||||
for image_id in image_ids:
|
||||
if not image_id in image_map:
|
||||
image_map[image_id] = self.createImage(image_id, repo, namespace)
|
||||
|
||||
v1_metadata = {
|
||||
'id': image_id,
|
||||
}
|
||||
if parent is not None:
|
||||
v1_metadata['parent'] = parent.docker_image_id
|
||||
|
||||
# Set the ancestors for the image.
|
||||
parent = model.image.set_image_metadata(image_id, namespace, name, '', '', '', v1_metadata,
|
||||
parent=parent)
|
||||
|
||||
# Set the tag for the image.
|
||||
tag_manifest, _ = model.tag.store_tag_manifest(namespace, name, tag_name, image_ids[-1],
|
||||
'sha:someshahere', '{}')
|
||||
|
||||
# Add some labels to the tag.
|
||||
model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest')
|
||||
model.label.create_manifest_label(tag_manifest, 'meh', 'grah', 'manifest')
|
||||
|
||||
return repo
|
||||
|
||||
def gcNow(self, repository):
|
||||
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
||||
|
||||
def deleteTag(self, repository, tag, perform_gc=True):
|
||||
model.tag.delete_tag(repository.namespace_user.username, repository.name, tag)
|
||||
if perform_gc:
|
||||
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
||||
|
||||
def moveTag(self, repository, tag, docker_image_id):
|
||||
model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag,
|
||||
docker_image_id)
|
||||
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
||||
|
||||
def assertNotDeleted(self, repository, *args):
|
||||
for docker_image_id in args:
|
||||
self.assertTrue(bool(model.image.get_image_by_id(repository.namespace_user.username,
|
||||
repository.name, docker_image_id)))
|
||||
|
||||
def assertDeleted(self, repository, *args):
|
||||
for docker_image_id in args:
|
||||
try:
|
||||
# Verify the image is missing when accessed by the repository.
|
||||
model.image.get_image_by_id(repository.namespace_user.username, repository.name,
|
||||
docker_image_id)
|
||||
except model.DataModelException:
|
||||
return
|
||||
|
||||
self.fail('Expected image %s to be deleted' % docker_image_id)
|
||||
|
||||
@staticmethod
|
||||
def _get_dangling_storage_count():
|
||||
storage_ids = set([current.id for current in ImageStorage.select()])
|
||||
referenced_by_image = set([image.storage_id for image in Image.select()])
|
||||
referenced_by_derived = set([derived.derivative_id
|
||||
for derived in DerivedStorageForImage.select()])
|
||||
|
||||
return len(storage_ids - referenced_by_image - referenced_by_derived)
|
||||
|
||||
@staticmethod
|
||||
def _get_dangling_label_count():
|
||||
label_ids = set([current.id for current in Label.select()])
|
||||
referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()])
|
||||
return len(label_ids - referenced_by_manifest)
|
||||
|
||||
@contextmanager
|
||||
def assert_gc_integrity(self, expect_storage_removed=True):
|
||||
""" Specialized assertion for ensuring that GC cleans up all dangling storages
|
||||
and labels, invokes the callback for images removed and doesn't invoke the
|
||||
callback for images *not* removed.
|
||||
"""
|
||||
# TODO: Consider also asserting the number of DB queries being performed.
|
||||
|
||||
# Add a callback for when images are removed.
|
||||
removed_image_storages = []
|
||||
model.config.register_image_cleanup_callback(removed_image_storages.extend)
|
||||
|
||||
# Store the number of dangling storages and labels.
|
||||
existing_storage_count = self._get_dangling_storage_count()
|
||||
existing_label_count = self._get_dangling_label_count()
|
||||
yield
|
||||
|
||||
# Ensure the number of dangling storages and labels has not changed.
|
||||
updated_storage_count = self._get_dangling_storage_count()
|
||||
self.assertEqual(updated_storage_count, existing_storage_count)
|
||||
|
||||
updated_label_count = self._get_dangling_label_count()
|
||||
self.assertEqual(updated_label_count, existing_label_count)
|
||||
|
||||
# Ensure that for each call to the image+storage cleanup callback, the image and its
|
||||
# storage is not found *anywhere* in the database.
|
||||
for removed_image_and_storage in removed_image_storages:
|
||||
with self.assertRaises(Image.DoesNotExist):
|
||||
Image.get(id=removed_image_and_storage.id)
|
||||
|
||||
with self.assertRaises(ImageStorage.DoesNotExist):
|
||||
ImageStorage.get(id=removed_image_and_storage.storage_id)
|
||||
|
||||
with self.assertRaises(ImageStorage.DoesNotExist):
|
||||
ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)
|
||||
|
||||
self.assertEquals(expect_storage_removed, bool(removed_image_storages))
|
||||
|
||||
# Ensure all CAS storage is in the storage engine.
|
||||
preferred = storage.preferred_locations[0]
|
||||
for storage_row in ImageStorage.select():
|
||||
if storage_row.cas_path:
|
||||
storage.get_content({preferred}, storage.blob_path(storage_row.content_checksum))
|
||||
|
||||
for blob_row in Blob.select():
|
||||
storage.get_content({preferred}, storage.blob_path(blob_row.digest))
|
||||
|
||||
def test_has_garbage(self):
|
||||
""" Remove all existing repositories, then add one without garbage, check, then add one with
|
||||
garbage, and check again.
|
||||
"""
|
||||
# Delete all existing repos.
|
||||
for repo in database.Repository.select().order_by(database.Repository.id):
|
||||
self.assertTrue(model.repository.purge_repository(repo.namespace_user.username, repo.name))
|
||||
|
||||
# Change the time machine expiration on the namespace.
|
||||
(database.User
|
||||
.update(removed_tag_expiration_s=1000000000)
|
||||
.where(database.User.username == ADMIN_ACCESS_USER)
|
||||
.execute())
|
||||
|
||||
# Create a repository without any garbage.
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
|
||||
|
||||
# Ensure that no repositories are returned by the has garbage check.
|
||||
self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))
|
||||
|
||||
# Delete a tag.
|
||||
self.deleteTag(repository, 'latest', perform_gc=False)
|
||||
|
||||
# There should still not be any repositories with garbage, due to time machine.
|
||||
self.assertIsNone(model.repository.find_repository_with_garbage(1000000000))
|
||||
|
||||
# Change the time machine expiration on the namespace.
|
||||
(database.User
|
||||
.update(removed_tag_expiration_s=0)
|
||||
.where(database.User.username == ADMIN_ACCESS_USER)
|
||||
.execute())
|
||||
|
||||
# Now we should find the repository for GC.
|
||||
repository = model.repository.find_repository_with_garbage(0)
|
||||
self.assertIsNotNone(repository)
|
||||
self.assertEquals(REPO, repository.name)
|
||||
|
||||
# GC the repository.
|
||||
self.assertTrue(model.repository.garbage_collect_repo(repository))
|
||||
|
||||
# There should now be no repositories with garbage.
|
||||
self.assertIsNone(model.repository.find_repository_with_garbage(0))
|
||||
|
||||
def test_find_garbage_policy_functions(self):
|
||||
with assert_query_count(1):
|
||||
one_policy = model.repository.get_random_gc_policy()
|
||||
all_policies = model.repository._get_gc_expiration_policies()
|
||||
self.assertIn(one_policy, all_policies)
|
||||
|
||||
def test_one_tag(self):
|
||||
""" Create a repository with a single tag, then remove that tag and verify that the repository
|
||||
is now empty. """
|
||||
with self.assert_gc_integrity():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'])
|
||||
self.deleteTag(repository, 'latest')
|
||||
self.assertDeleted(repository, 'i1', 'i2', 'i3')
|
||||
|
||||
def test_two_tags_unshared_images(self):
|
||||
""" Repository has two tags with no shared images between them. """
|
||||
with self.assert_gc_integrity():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2'])
|
||||
self.deleteTag(repository, 'latest')
|
||||
self.assertDeleted(repository, 'i1', 'i2', 'i3')
|
||||
self.assertNotDeleted(repository, 'f1', 'f2')
|
||||
|
||||
def test_two_tags_shared_images(self):
|
||||
""" Repository has two tags with shared images. Deleting the tag should only remove the
|
||||
unshared images.
|
||||
"""
|
||||
with self.assert_gc_integrity():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
||||
self.deleteTag(repository, 'latest')
|
||||
self.assertDeleted(repository, 'i2', 'i3')
|
||||
self.assertNotDeleted(repository, 'i1', 'f1')
|
||||
|
||||
def test_unrelated_repositories(self):
|
||||
""" Two repositories with different images. Removing the tag from one leaves the other's
|
||||
images intact.
|
||||
"""
|
||||
with self.assert_gc_integrity():
|
||||
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
|
||||
repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2')
|
||||
|
||||
self.deleteTag(repository1, 'latest')
|
||||
|
||||
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
|
||||
self.assertNotDeleted(repository2, 'j1', 'j2', 'j3')
|
||||
|
||||
def test_related_repositories(self):
|
||||
""" Two repositories with shared images. Removing the tag from one leaves the other's
|
||||
images intact.
|
||||
"""
|
||||
with self.assert_gc_integrity():
|
||||
repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1')
|
||||
repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2')
|
||||
|
||||
self.deleteTag(repository1, 'latest')
|
||||
|
||||
self.assertDeleted(repository1, 'i3')
|
||||
self.assertNotDeleted(repository2, 'i1', 'i2', 'j1')
|
||||
|
||||
def test_inaccessible_repositories(self):
|
||||
""" Two repositories under different namespaces should result in the images being deleted
|
||||
but not completely removed from the database.
|
||||
"""
|
||||
with self.assert_gc_integrity():
|
||||
repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3'])
|
||||
repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3'])
|
||||
|
||||
self.deleteTag(repository1, 'latest')
|
||||
self.assertDeleted(repository1, 'i1', 'i2', 'i3')
|
||||
self.assertNotDeleted(repository2, 'i1', 'i2', 'i3')
|
||||
|
||||
def test_multiple_shared_images(self):
|
||||
""" Repository has multiple tags with shared images. Selectively deleting the tags, and
|
||||
verifying at each step.
|
||||
"""
|
||||
with self.assert_gc_integrity():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
||||
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
|
||||
|
||||
# Current state:
|
||||
# latest -> i3->i2->i1
|
||||
# other -> f2->f1->i1
|
||||
# third -> t3->t2->t1
|
||||
# fourth -> f1->i1
|
||||
|
||||
# Delete tag other. Should delete f2, since it is not shared.
|
||||
self.deleteTag(repository, 'other')
|
||||
self.assertDeleted(repository, 'f2')
|
||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1')
|
||||
|
||||
# Current state:
|
||||
# latest -> i3->i2->i1
|
||||
# third -> t3->t2->t1
|
||||
# fourth -> f1->i1
|
||||
|
||||
# Move tag fourth to i3. This should remove f1 since it is no longer referenced.
|
||||
self.moveTag(repository, 'fourth', 'i3')
|
||||
self.assertDeleted(repository, 'f1')
|
||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
|
||||
|
||||
# Current state:
|
||||
# latest -> i3->i2->i1
|
||||
# third -> t3->t2->t1
|
||||
# fourth -> i3->i2->i1
|
||||
|
||||
# Delete tag 'latest'. This should do nothing since fourth is on the same branch.
|
||||
self.deleteTag(repository, 'latest')
|
||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3')
|
||||
|
||||
# Current state:
|
||||
# third -> t3->t2->t1
|
||||
# fourth -> i3->i2->i1
|
||||
|
||||
# Delete tag 'third'. This should remove t1->t3.
|
||||
self.deleteTag(repository, 'third')
|
||||
self.assertDeleted(repository, 't1', 't2', 't3')
|
||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
|
||||
|
||||
# Current state:
|
||||
# fourth -> i3->i2->i1
|
||||
|
||||
# Add tag to i1.
|
||||
self.moveTag(repository, 'newtag', 'i1')
|
||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3')
|
||||
|
||||
# Current state:
|
||||
# fourth -> i3->i2->i1
|
||||
# newtag -> i1
|
||||
|
||||
# Delete tag 'fourth'. This should remove i2 and i3.
|
||||
self.deleteTag(repository, 'fourth')
|
||||
self.assertDeleted(repository, 'i2', 'i3')
|
||||
self.assertNotDeleted(repository, 'i1')
|
||||
|
||||
# Current state:
|
||||
# newtag -> i1
|
||||
|
||||
# Delete tag 'newtag'. This should remove the remaining image.
|
||||
self.deleteTag(repository, 'newtag')
|
||||
self.assertDeleted(repository, 'i1')
|
||||
|
||||
# Current state:
|
||||
# (Empty)
|
||||
|
||||
def test_empty_gc(self):
|
||||
with self.assert_gc_integrity(expect_storage_removed=False):
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'],
|
||||
third=['t1', 't2', 't3'], fourth=['i1', 'f1'])
|
||||
|
||||
self.gcNow(repository)
|
||||
self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2')
|
||||
|
||||
def test_time_machine_no_gc(self):
|
||||
""" Repository has two tags with shared images. Deleting the tag should not remove any images
|
||||
"""
|
||||
with self.assert_gc_integrity(expect_storage_removed=False):
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
||||
self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24)
|
||||
|
||||
self.deleteTag(repository, 'latest')
|
||||
self.assertNotDeleted(repository, 'i2', 'i3')
|
||||
self.assertNotDeleted(repository, 'i1', 'f1')
|
||||
|
||||
def test_time_machine_gc(self):
|
||||
""" Repository has two tags with shared images. Deleting the second tag should cause the images
|
||||
for the first deleted tag to gc.
|
||||
"""
|
||||
with self.assert_gc_integrity():
|
||||
repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1'])
|
||||
|
||||
self._set_tag_expiration_policy(repository.namespace_user.username, 1)
|
||||
|
||||
self.deleteTag(repository, 'latest')
|
||||
self.assertNotDeleted(repository, 'i2', 'i3')
|
||||
self.assertNotDeleted(repository, 'i1', 'f1')
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
self.deleteTag(repository, 'other') # This will cause the images associated with latest to gc
|
||||
self.assertDeleted(repository, 'i2', 'i3')
|
||||
self.assertNotDeleted(repository, 'i1', 'f1')
|
||||
|
||||
def test_images_shared_storage(self):
|
||||
""" Repository with two tags, both with the same shared storage. Deleting the first
|
||||
tag should delete the first image, but *not* its storage.
|
||||
"""
|
||||
with self.assert_gc_integrity(expect_storage_removed=False):
|
||||
repository = self.createRepository()
|
||||
|
||||
# Add two tags, each with their own image, but with the same storage.
|
||||
image_storage = model.storage.create_v1_storage(storage.preferred_locations[0])
|
||||
|
||||
first_image = Image.create(docker_image_id='i1',
|
||||
repository=repository, storage=image_storage,
|
||||
ancestors='/')
|
||||
|
||||
second_image = Image.create(docker_image_id='i2',
|
||||
repository=repository, storage=image_storage,
|
||||
ancestors='/')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'first', first_image.docker_image_id,
|
||||
'sha:someshahere', '{}')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'second', second_image.docker_image_id,
|
||||
'sha:someshahere', '{}')
|
||||
|
||||
# Delete the first tag.
|
||||
self.deleteTag(repository, 'first')
|
||||
self.assertDeleted(repository, 'i1')
|
||||
self.assertNotDeleted(repository, 'i2')
|
||||
|
||||
def test_image_with_cas(self):
|
||||
""" A repository with a tag pointing to an image backed by CAS. Deleting and GCing the tag
|
||||
should result in the storage and its CAS data being removed.
|
||||
"""
|
||||
with self.assert_gc_integrity(expect_storage_removed=True):
|
||||
repository = self.createRepository()
|
||||
|
||||
# Create an image storage record under CAS.
|
||||
content = 'hello world'
|
||||
digest = 'sha256:' + hashlib.sha256(content).hexdigest()
|
||||
preferred = storage.preferred_locations[0]
|
||||
storage.put_content({preferred}, storage.blob_path(digest), content)
|
||||
|
||||
image_storage = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||
location = database.ImageStorageLocation.get(name=preferred)
|
||||
database.ImageStoragePlacement.create(location=location, storage=image_storage)
|
||||
|
||||
# Ensure the CAS path exists.
|
||||
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
||||
|
||||
# Create the image and the tag.
|
||||
first_image = Image.create(docker_image_id='i1',
|
||||
repository=repository, storage=image_storage,
|
||||
ancestors='/')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'first', first_image.docker_image_id,
|
||||
'sha:someshahere1', '{}')
|
||||
|
||||
self.assertNotDeleted(repository, 'i1')
|
||||
|
||||
# Delete the tag.
|
||||
self.deleteTag(repository, 'first')
|
||||
self.assertDeleted(repository, 'i1')
|
||||
|
||||
# Ensure the CAS path is gone.
|
||||
self.assertFalse(storage.exists({preferred}, storage.blob_path(digest)))
|
||||
|
||||
def test_images_shared_cas(self):
|
||||
""" A repository, each two tags, pointing to the same image, which has image storage
|
||||
with the same *CAS path*, but *distinct records*. Deleting the first tag should delete the
|
||||
first image, and its storage, but not the file in storage, as it shares its CAS path.
|
||||
"""
|
||||
with self.assert_gc_integrity(expect_storage_removed=True):
|
||||
repository = self.createRepository()
|
||||
|
||||
# Create two image storage records with the same content checksum.
|
||||
content = 'hello world'
|
||||
digest = 'sha256:' + hashlib.sha256(content).hexdigest()
|
||||
preferred = storage.preferred_locations[0]
|
||||
storage.put_content({preferred}, storage.blob_path(digest), content)
|
||||
|
||||
is1 = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||
is2 = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||
|
||||
location = database.ImageStorageLocation.get(name=preferred)
|
||||
|
||||
database.ImageStoragePlacement.create(location=location, storage=is1)
|
||||
database.ImageStoragePlacement.create(location=location, storage=is2)
|
||||
|
||||
# Ensure the CAS path exists.
|
||||
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
||||
|
||||
# Create two images in the repository, and two tags, each pointing to one of the storages.
|
||||
first_image = Image.create(docker_image_id='i1',
|
||||
repository=repository, storage=is1,
|
||||
ancestors='/')
|
||||
|
||||
second_image = Image.create(docker_image_id='i2',
|
||||
repository=repository, storage=is2,
|
||||
ancestors='/')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'first', first_image.docker_image_id,
|
||||
'sha:someshahere1', '{}')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'second', second_image.docker_image_id,
|
||||
'sha:someshahere2', '{}')
|
||||
|
||||
self.assertNotDeleted(repository, 'i1', 'i2')
|
||||
|
||||
# Delete the first tag.
|
||||
self.deleteTag(repository, 'first')
|
||||
self.assertDeleted(repository, 'i1')
|
||||
self.assertNotDeleted(repository, 'i2')
|
||||
|
||||
# Ensure the CAS path still exists.
|
||||
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
||||
|
||||
def test_images_shared_cas_with_new_blob_table(self):
|
||||
""" A repository with a tag and image that shares its CAS path with a record in the new Blob
|
||||
table. Deleting the first tag should delete the first image, and its storage, but not the
|
||||
file in storage, as it shares its CAS path with the blob row.
|
||||
"""
|
||||
with self.assert_gc_integrity(expect_storage_removed=True):
|
||||
repository = self.createRepository()
|
||||
|
||||
# Create two image storage records with the same content checksum.
|
||||
content = 'hello world'
|
||||
digest = 'sha256:' + hashlib.sha256(content).hexdigest()
|
||||
preferred = storage.preferred_locations[0]
|
||||
storage.put_content({preferred}, storage.blob_path(digest), content)
|
||||
|
||||
media_type = database.MediaType.get(name='text/plain')
|
||||
|
||||
is1 = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||
is2 = database.Blob.create(digest=digest, size=0, media_type=media_type)
|
||||
|
||||
location = database.ImageStorageLocation.get(name=preferred)
|
||||
database.ImageStoragePlacement.create(location=location, storage=is1)
|
||||
|
||||
# Ensure the CAS path exists.
|
||||
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
||||
|
||||
# Create the image in the repository, and the tag.
|
||||
first_image = Image.create(docker_image_id='i1',
|
||||
repository=repository, storage=is1,
|
||||
ancestors='/')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'first', first_image.docker_image_id,
|
||||
'sha:someshahere1', '{}')
|
||||
|
||||
self.assertNotDeleted(repository, 'i1')
|
||||
|
||||
# Delete the tag.
|
||||
self.deleteTag(repository, 'first')
|
||||
self.assertDeleted(repository, 'i1')
|
||||
|
||||
# Ensure the CAS path still exists, as it is referenced by the Blob table.
|
||||
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Reference in a new issue