Move GC tests into pytest
This commit is contained in:
		
							parent
							
								
									e45ffb39d1
								
							
						
					
					
						commit
						e9a95874ee
					
				
					 2 changed files with 615 additions and 585 deletions
				
			
		
							
								
								
									
										615
									
								
								data/model/test/test_gc.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										615
									
								
								data/model/test/test_gc.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,615 @@ | ||||||
|  | import hashlib | ||||||
|  | import pytest | ||||||
|  | import time | ||||||
|  | 
 | ||||||
|  | from mock import patch | ||||||
|  | 
 | ||||||
|  | from app import storage | ||||||
|  | from contextlib import contextmanager | ||||||
|  | from playhouse.test_utils import assert_query_count | ||||||
|  | 
 | ||||||
|  | from data import model, database | ||||||
|  | from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel, Blob | ||||||
|  | from test.fixtures import * | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | ADMIN_ACCESS_USER = 'devtable' | ||||||
|  | PUBLIC_USER = 'public' | ||||||
|  | 
 | ||||||
|  | REPO = 'somerepo' | ||||||
|  | 
 | ||||||
|  | def _set_tag_expiration_policy(namespace, expiration_s): | ||||||
|  |   namespace_user = model.user.get_user(namespace) | ||||||
|  |   model.user.change_user_tag_expiration(namespace_user, expiration_s) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @pytest.fixture() | ||||||
|  | def default_tag_policy(initialized_db): | ||||||
|  |   _set_tag_expiration_policy(ADMIN_ACCESS_USER, 0) | ||||||
|  |   _set_tag_expiration_policy(PUBLIC_USER, 0) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def create_image(docker_image_id, repository_obj, username): | ||||||
|  |   preferred = storage.preferred_locations[0] | ||||||
|  |   image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {}, | ||||||
|  |                                                 preferred) | ||||||
|  |   image.storage.uploading = False | ||||||
|  |   image.storage.save() | ||||||
|  | 
 | ||||||
|  |   # Create derived images as well. | ||||||
|  |   model.image.find_or_create_derived_storage(image, 'squash', preferred) | ||||||
|  |   model.image.find_or_create_derived_storage(image, 'aci', preferred) | ||||||
|  | 
 | ||||||
|  |   # Add some torrent info. | ||||||
|  |   try: | ||||||
|  |     database.TorrentInfo.get(storage=image.storage) | ||||||
|  |   except database.TorrentInfo.DoesNotExist: | ||||||
|  |     model.storage.save_torrent_info(image.storage, 1, 'helloworld') | ||||||
|  | 
 | ||||||
|  |   # Add some additional placements to the image. | ||||||
|  |   for location_name in ['local_eu']: | ||||||
|  |     location = database.ImageStorageLocation.get(name=location_name) | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |       database.ImageStoragePlacement.get(location=location, storage=image.storage) | ||||||
|  |     except: | ||||||
|  |       continue | ||||||
|  | 
 | ||||||
|  |     database.ImageStoragePlacement.create(location=location, storage=image.storage) | ||||||
|  | 
 | ||||||
|  |   return image.storage | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def create_repository(namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs): | ||||||
|  |   user = model.user.get_user(namespace) | ||||||
|  |   repo = model.repository.create_repository(namespace, name, user) | ||||||
|  | 
 | ||||||
|  |   # Populate the repository with the tags. | ||||||
|  |   image_map = {} | ||||||
|  |   for tag_name in kwargs: | ||||||
|  |     image_ids = kwargs[tag_name] | ||||||
|  |     parent = None | ||||||
|  | 
 | ||||||
|  |     for image_id in image_ids: | ||||||
|  |       if not image_id in image_map: | ||||||
|  |         image_map[image_id] = create_image(image_id, repo, namespace) | ||||||
|  | 
 | ||||||
|  |       v1_metadata = { | ||||||
|  |         'id': image_id, | ||||||
|  |       } | ||||||
|  |       if parent is not None: | ||||||
|  |         v1_metadata['parent'] = parent.docker_image_id | ||||||
|  | 
 | ||||||
|  |       # Set the ancestors for the image. | ||||||
|  |       parent = model.image.set_image_metadata(image_id, namespace, name, '', '', '', v1_metadata, | ||||||
|  |                                               parent=parent) | ||||||
|  | 
 | ||||||
|  |     # Set the tag for the image. | ||||||
|  |     tag_manifest, _ = model.tag.store_tag_manifest(namespace, name, tag_name, image_ids[-1], | ||||||
|  |                                                    'sha:someshahere', '{}') | ||||||
|  | 
 | ||||||
|  |     # Add some labels to the tag. | ||||||
|  |     model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest') | ||||||
|  |     model.label.create_manifest_label(tag_manifest, 'meh', 'grah', 'manifest') | ||||||
|  | 
 | ||||||
|  |   return repo | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def gc_now(repository): | ||||||
|  |   assert model.repository.garbage_collect_repo(repository) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def delete_tag(repository, tag, perform_gc=True): | ||||||
|  |   model.tag.delete_tag(repository.namespace_user.username, repository.name, tag) | ||||||
|  |   if perform_gc: | ||||||
|  |     assert model.repository.garbage_collect_repo(repository) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def move_tag(repository, tag, docker_image_id): | ||||||
|  |   model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag, | ||||||
|  |                                  docker_image_id) | ||||||
|  |   assert model.repository.garbage_collect_repo(repository) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def assert_not_deleted(repository, *args): | ||||||
|  |   for docker_image_id in args: | ||||||
|  |     assert model.image.get_image_by_id(repository.namespace_user.username, repository.name, | ||||||
|  |                                        docker_image_id) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def assert_deleted(repository, *args): | ||||||
|  |   for docker_image_id in args: | ||||||
|  |     try: | ||||||
|  |       # Verify the image is missing when accessed by the repository. | ||||||
|  |       model.image.get_image_by_id(repository.namespace_user.username, repository.name, | ||||||
|  |                                   docker_image_id) | ||||||
|  |     except model.DataModelException: | ||||||
|  |       return | ||||||
|  | 
 | ||||||
|  |     assert False, 'Expected image %s to be deleted' % docker_image_id | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _get_dangling_storage_count(): | ||||||
|  |   storage_ids = set([current.id for current in ImageStorage.select()]) | ||||||
|  |   referenced_by_image = set([image.storage_id for image in Image.select()]) | ||||||
|  |   referenced_by_derived = set([derived.derivative_id | ||||||
|  |                                for derived in DerivedStorageForImage.select()]) | ||||||
|  | 
 | ||||||
|  |   return len(storage_ids - referenced_by_image - referenced_by_derived) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def _get_dangling_label_count(): | ||||||
|  |   label_ids = set([current.id for current in Label.select()]) | ||||||
|  |   referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()]) | ||||||
|  |   return len(label_ids - referenced_by_manifest) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @contextmanager | ||||||
|  | def assert_gc_integrity(expect_storage_removed=True): | ||||||
|  |   """ Specialized assertion for ensuring that GC cleans up all dangling storages | ||||||
|  |       and labels, invokes the callback for images removed and doesn't invoke the | ||||||
|  |       callback for images *not* removed. | ||||||
|  |   """ | ||||||
|  |   # Add a callback for when images are removed. | ||||||
|  |   removed_image_storages = [] | ||||||
|  |   model.config.register_image_cleanup_callback(removed_image_storages.extend) | ||||||
|  | 
 | ||||||
|  |   # Store the number of dangling storages and labels. | ||||||
|  |   existing_storage_count = _get_dangling_storage_count() | ||||||
|  |   existing_label_count = _get_dangling_label_count() | ||||||
|  |   yield | ||||||
|  | 
 | ||||||
|  |   # Ensure the number of dangling storages and labels has not changed. | ||||||
|  |   updated_storage_count = _get_dangling_storage_count() | ||||||
|  |   assert updated_storage_count == existing_storage_count | ||||||
|  | 
 | ||||||
|  |   updated_label_count = _get_dangling_label_count() | ||||||
|  |   assert updated_label_count == existing_label_count | ||||||
|  | 
 | ||||||
|  |   # Ensure that for each call to the image+storage cleanup callback, the image and its | ||||||
|  |   # storage is not found *anywhere* in the database. | ||||||
|  |   for removed_image_and_storage in removed_image_storages: | ||||||
|  |     with pytest.raises(Image.DoesNotExist): | ||||||
|  |       Image.get(id=removed_image_and_storage.id) | ||||||
|  | 
 | ||||||
|  |     with pytest.raises(ImageStorage.DoesNotExist): | ||||||
|  |       ImageStorage.get(id=removed_image_and_storage.storage_id) | ||||||
|  | 
 | ||||||
|  |     with pytest.raises(ImageStorage.DoesNotExist): | ||||||
|  |       ImageStorage.get(uuid=removed_image_and_storage.storage.uuid) | ||||||
|  | 
 | ||||||
|  |   assert expect_storage_removed == bool(removed_image_storages) | ||||||
|  | 
 | ||||||
|  |   # Ensure all CAS storage is in the storage engine. | ||||||
|  |   preferred = storage.preferred_locations[0] | ||||||
|  |   for storage_row in ImageStorage.select(): | ||||||
|  |     if storage_row.cas_path: | ||||||
|  |       storage.get_content({preferred}, storage.blob_path(storage_row.content_checksum)) | ||||||
|  | 
 | ||||||
|  |   for blob_row in Blob.select(): | ||||||
|  |       storage.get_content({preferred}, storage.blob_path(blob_row.digest)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_has_garbage(default_tag_policy, initialized_db): | ||||||
|  |   """ Remove all existing repositories, then add one without garbage, check, then add one with | ||||||
|  |       garbage, and check again. | ||||||
|  |   """ | ||||||
|  |   # Delete all existing repos. | ||||||
|  |   for repo in database.Repository.select().order_by(database.Repository.id): | ||||||
|  |     assert model.repository.purge_repository(repo.namespace_user.username, repo.name) | ||||||
|  | 
 | ||||||
|  |   # Change the time machine expiration on the namespace. | ||||||
|  |   (database.User | ||||||
|  |    .update(removed_tag_expiration_s=1000000000) | ||||||
|  |    .where(database.User.username == ADMIN_ACCESS_USER) | ||||||
|  |    .execute()) | ||||||
|  | 
 | ||||||
|  |   # Create a repository without any garbage. | ||||||
|  |   repository = create_repository(latest=['i1', 'i2', 'i3']) | ||||||
|  | 
 | ||||||
|  |   # Ensure that no repositories are returned by the has garbage check. | ||||||
|  |   assert model.repository.find_repository_with_garbage(1000000000) is None | ||||||
|  | 
 | ||||||
|  |   # Delete a tag. | ||||||
|  |   delete_tag(repository, 'latest', perform_gc=False) | ||||||
|  | 
 | ||||||
|  |   # There should still not be any repositories with garbage, due to time machine. | ||||||
|  |   assert model.repository.find_repository_with_garbage(1000000000) is None | ||||||
|  | 
 | ||||||
|  |   # Change the time machine expiration on the namespace. | ||||||
|  |   (database.User | ||||||
|  |    .update(removed_tag_expiration_s=0) | ||||||
|  |    .where(database.User.username == ADMIN_ACCESS_USER) | ||||||
|  |    .execute()) | ||||||
|  | 
 | ||||||
|  |   # Now we should find the repository for GC. | ||||||
|  |   repository = model.repository.find_repository_with_garbage(0) | ||||||
|  |   assert repository is not None | ||||||
|  |   assert repository.name == REPO | ||||||
|  | 
 | ||||||
|  |   # GC the repository. | ||||||
|  |   assert model.repository.garbage_collect_repo(repository) | ||||||
|  | 
 | ||||||
|  |   # There should now be no repositories with garbage. | ||||||
|  |   assert model.repository.find_repository_with_garbage(0) is None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_find_garbage_policy_functions(default_tag_policy, initialized_db): | ||||||
|  |   with assert_query_count(1): | ||||||
|  |     one_policy = model.repository.get_random_gc_policy() | ||||||
|  |     all_policies = model.repository._get_gc_expiration_policies() | ||||||
|  |     assert one_policy in all_policies | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_one_tag(default_tag_policy, initialized_db): | ||||||
|  |   """ Create a repository with a single tag, then remove that tag and verify that the repository | ||||||
|  |       is now empty. """ | ||||||
|  |   with assert_gc_integrity(): | ||||||
|  |     repository = create_repository(latest=['i1', 'i2', 'i3']) | ||||||
|  |     delete_tag(repository, 'latest') | ||||||
|  |     assert_deleted(repository, 'i1', 'i2', 'i3') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_two_tags_unshared_images(default_tag_policy, initialized_db): | ||||||
|  |   """ Repository has two tags with no shared images between them. """ | ||||||
|  |   with assert_gc_integrity(): | ||||||
|  |     repository = create_repository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2']) | ||||||
|  |     delete_tag(repository, 'latest') | ||||||
|  |     assert_deleted(repository, 'i1', 'i2', 'i3') | ||||||
|  |     assert_not_deleted(repository, 'f1', 'f2') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_two_tags_shared_images(default_tag_policy, initialized_db): | ||||||
|  |   """ Repository has two tags with shared images. Deleting the tag should only remove the | ||||||
|  |       unshared images. | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(): | ||||||
|  |     repository = create_repository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) | ||||||
|  |     delete_tag(repository, 'latest') | ||||||
|  |     assert_deleted(repository, 'i2', 'i3') | ||||||
|  |     assert_not_deleted(repository, 'i1', 'f1') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_unrelated_repositories(default_tag_policy, initialized_db): | ||||||
|  |   """ Two repositories with different images. Removing the tag from one leaves the other's | ||||||
|  |       images intact. | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(): | ||||||
|  |     repository1 = create_repository(latest=['i1', 'i2', 'i3'], name='repo1') | ||||||
|  |     repository2 = create_repository(latest=['j1', 'j2', 'j3'], name='repo2') | ||||||
|  | 
 | ||||||
|  |     delete_tag(repository1, 'latest') | ||||||
|  | 
 | ||||||
|  |     assert_deleted(repository1, 'i1', 'i2', 'i3') | ||||||
|  |     assert_not_deleted(repository2, 'j1', 'j2', 'j3') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_related_repositories(default_tag_policy, initialized_db): | ||||||
|  |   """ Two repositories with shared images. Removing the tag from one leaves the other's | ||||||
|  |       images intact. | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(): | ||||||
|  |     repository1 = create_repository(latest=['i1', 'i2', 'i3'], name='repo1') | ||||||
|  |     repository2 = create_repository(latest=['i1', 'i2', 'j1'], name='repo2') | ||||||
|  | 
 | ||||||
|  |     delete_tag(repository1, 'latest') | ||||||
|  | 
 | ||||||
|  |     assert_deleted(repository1, 'i3') | ||||||
|  |     assert_not_deleted(repository2, 'i1', 'i2', 'j1') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_inaccessible_repositories(default_tag_policy, initialized_db): | ||||||
|  |   """ Two repositories under different namespaces should result in the images being deleted | ||||||
|  |       but not completely removed from the database. | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(): | ||||||
|  |     repository1 = create_repository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3']) | ||||||
|  |     repository2 = create_repository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3']) | ||||||
|  | 
 | ||||||
|  |     delete_tag(repository1, 'latest') | ||||||
|  |     assert_deleted(repository1, 'i1', 'i2', 'i3') | ||||||
|  |     assert_not_deleted(repository2, 'i1', 'i2', 'i3') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_many_multiple_shared_images(default_tag_policy, initialized_db): | ||||||
|  |   """ Repository has multiple tags with shared images. Delete all but one tag. | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(): | ||||||
|  |     repository = create_repository(latest=['i1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7', 'i8', 'j0'], | ||||||
|  |                                    master=['i1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7', 'i8', 'j1']) | ||||||
|  | 
 | ||||||
|  |     # Delete tag latest. Should only delete j0, since it is not shared. | ||||||
|  |     delete_tag(repository, 'latest') | ||||||
|  | 
 | ||||||
|  |     assert_deleted(repository, 'j0') | ||||||
|  |     assert_not_deleted(repository, 'i1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7', 'i8', 'j1') | ||||||
|  | 
 | ||||||
|  |     # Delete tag master. Should delete the rest of the images. | ||||||
|  |     delete_tag(repository, 'master') | ||||||
|  | 
 | ||||||
|  |     assert_deleted(repository, 'i1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7', 'i8', 'j1') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_multiple_shared_images(default_tag_policy, initialized_db): | ||||||
|  |   """ Repository has multiple tags with shared images. Selectively deleting the tags, and | ||||||
|  |       verifying at each step. | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(): | ||||||
|  |     repository = create_repository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], | ||||||
|  |                                    third=['t1', 't2', 't3'], fourth=['i1', 'f1']) | ||||||
|  | 
 | ||||||
|  |     # Current state: | ||||||
|  |     # latest -> i3->i2->i1 | ||||||
|  |     # other -> f2->f1->i1 | ||||||
|  |     # third -> t3->t2->t1 | ||||||
|  |     # fourth -> f1->i1 | ||||||
|  | 
 | ||||||
|  |     # Delete tag other. Should delete f2, since it is not shared. | ||||||
|  |     delete_tag(repository, 'other') | ||||||
|  |     assert_deleted(repository, 'f2') | ||||||
|  |     assert_not_deleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') | ||||||
|  | 
 | ||||||
|  |     # Current state: | ||||||
|  |     # latest -> i3->i2->i1 | ||||||
|  |     # third -> t3->t2->t1 | ||||||
|  |     # fourth -> f1->i1 | ||||||
|  | 
 | ||||||
|  |     # Move tag fourth to i3. This should remove f1 since it is no longer referenced. | ||||||
|  |     move_tag(repository, 'fourth', 'i3') | ||||||
|  |     assert_deleted(repository, 'f1') | ||||||
|  |     assert_not_deleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') | ||||||
|  | 
 | ||||||
|  |     # Current state: | ||||||
|  |     # latest -> i3->i2->i1 | ||||||
|  |     # third -> t3->t2->t1 | ||||||
|  |     # fourth -> i3->i2->i1 | ||||||
|  | 
 | ||||||
|  |     # Delete tag 'latest'. This should do nothing since fourth is on the same branch. | ||||||
|  |     delete_tag(repository, 'latest') | ||||||
|  |     assert_not_deleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') | ||||||
|  | 
 | ||||||
|  |     # Current state: | ||||||
|  |     # third -> t3->t2->t1 | ||||||
|  |     # fourth -> i3->i2->i1 | ||||||
|  | 
 | ||||||
|  |     # Delete tag 'third'. This should remove t1->t3. | ||||||
|  |     delete_tag(repository, 'third') | ||||||
|  |     assert_deleted(repository, 't1', 't2', 't3') | ||||||
|  |     assert_not_deleted(repository, 'i1', 'i2', 'i3') | ||||||
|  | 
 | ||||||
|  |     # Current state: | ||||||
|  |     # fourth -> i3->i2->i1 | ||||||
|  | 
 | ||||||
|  |     # Add tag to i1. | ||||||
|  |     move_tag(repository, 'newtag', 'i1') | ||||||
|  |     assert_not_deleted(repository, 'i1', 'i2', 'i3') | ||||||
|  | 
 | ||||||
|  |     # Current state: | ||||||
|  |     # fourth -> i3->i2->i1 | ||||||
|  |     # newtag -> i1 | ||||||
|  | 
 | ||||||
|  |     # Delete tag 'fourth'. This should remove i2 and i3. | ||||||
|  |     delete_tag(repository, 'fourth') | ||||||
|  |     assert_deleted(repository, 'i2', 'i3') | ||||||
|  |     assert_not_deleted(repository, 'i1') | ||||||
|  | 
 | ||||||
|  |     # Current state: | ||||||
|  |     # newtag -> i1 | ||||||
|  | 
 | ||||||
|  |     # Delete tag 'newtag'. This should remove the remaining image. | ||||||
|  |     delete_tag(repository, 'newtag') | ||||||
|  |     assert_deleted(repository, 'i1') | ||||||
|  | 
 | ||||||
|  |     # Current state: | ||||||
|  |     # (Empty) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_empty_gc(default_tag_policy, initialized_db): | ||||||
|  |   with assert_gc_integrity(expect_storage_removed=False): | ||||||
|  |     repository = create_repository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], | ||||||
|  |                                        third=['t1', 't2', 't3'], fourth=['i1', 'f1']) | ||||||
|  | 
 | ||||||
|  |     gc_now(repository) | ||||||
|  |     assert_not_deleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_time_machine_no_gc(default_tag_policy, initialized_db): | ||||||
|  |   """ Repository has two tags with shared images. Deleting the tag should not remove any images | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(expect_storage_removed=False): | ||||||
|  |     repository = create_repository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) | ||||||
|  |     _set_tag_expiration_policy(repository.namespace_user.username, 60*60*24) | ||||||
|  | 
 | ||||||
|  |     delete_tag(repository, 'latest') | ||||||
|  |     assert_not_deleted(repository, 'i2', 'i3') | ||||||
|  |     assert_not_deleted(repository, 'i1', 'f1') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_time_machine_gc(default_tag_policy, initialized_db): | ||||||
|  |   """ Repository has two tags with shared images. Deleting the second tag should cause the images | ||||||
|  |       for the first deleted tag to gc. | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(): | ||||||
|  |     repository = create_repository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) | ||||||
|  | 
 | ||||||
|  |     _set_tag_expiration_policy(repository.namespace_user.username, 1) | ||||||
|  | 
 | ||||||
|  |     delete_tag(repository, 'latest') | ||||||
|  |     assert_not_deleted(repository, 'i2', 'i3') | ||||||
|  |     assert_not_deleted(repository, 'i1', 'f1') | ||||||
|  | 
 | ||||||
|  |     time.sleep(2) | ||||||
|  | 
 | ||||||
|  |     # This will cause the images associated with latest to gc | ||||||
|  |     delete_tag(repository, 'other') | ||||||
|  |     assert_deleted(repository, 'i2', 'i3') | ||||||
|  |     assert_not_deleted(repository, 'i1', 'f1') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_images_shared_storage(default_tag_policy, initialized_db): | ||||||
|  |   """ Repository with two tags, both with the same shared storage. Deleting the first | ||||||
|  |       tag should delete the first image, but *not* its storage. | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(expect_storage_removed=False): | ||||||
|  |     repository = create_repository() | ||||||
|  | 
 | ||||||
|  |     # Add two tags, each with their own image, but with the same storage. | ||||||
|  |     image_storage = model.storage.create_v1_storage(storage.preferred_locations[0]) | ||||||
|  | 
 | ||||||
|  |     first_image = Image.create(docker_image_id='i1', | ||||||
|  |                                repository=repository, storage=image_storage, | ||||||
|  |                                ancestors='/') | ||||||
|  | 
 | ||||||
|  |     second_image = Image.create(docker_image_id='i2', | ||||||
|  |                                 repository=repository, storage=image_storage, | ||||||
|  |                                 ancestors='/') | ||||||
|  | 
 | ||||||
|  |     model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, | ||||||
|  |                                  'first', first_image.docker_image_id, | ||||||
|  |                                  'sha:someshahere', '{}') | ||||||
|  | 
 | ||||||
|  |     model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, | ||||||
|  |                                  'second', second_image.docker_image_id, | ||||||
|  |                                  'sha:someshahere', '{}') | ||||||
|  | 
 | ||||||
|  |     # Delete the first tag. | ||||||
|  |     delete_tag(repository, 'first') | ||||||
|  |     assert_deleted(repository, 'i1') | ||||||
|  |     assert_not_deleted(repository, 'i2') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_image_with_cas(default_tag_policy, initialized_db): | ||||||
|  |   """ A repository with a tag pointing to an image backed by CAS. Deleting and GCing the tag | ||||||
|  |       should result in the storage and its CAS data being removed. | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(expect_storage_removed=True): | ||||||
|  |     repository = create_repository() | ||||||
|  | 
 | ||||||
|  |     # Create an image storage record under CAS. | ||||||
|  |     content = 'hello world' | ||||||
|  |     digest = 'sha256:' + hashlib.sha256(content).hexdigest() | ||||||
|  |     preferred = storage.preferred_locations[0] | ||||||
|  |     storage.put_content({preferred}, storage.blob_path(digest), content) | ||||||
|  | 
 | ||||||
|  |     image_storage = database.ImageStorage.create(content_checksum=digest, uploading=False) | ||||||
|  |     location = database.ImageStorageLocation.get(name=preferred) | ||||||
|  |     database.ImageStoragePlacement.create(location=location, storage=image_storage) | ||||||
|  | 
 | ||||||
|  |     # Ensure the CAS path exists. | ||||||
|  |     assert storage.exists({preferred}, storage.blob_path(digest)) | ||||||
|  | 
 | ||||||
|  |     # Create the image and the tag. | ||||||
|  |     first_image = Image.create(docker_image_id='i1', | ||||||
|  |                                repository=repository, storage=image_storage, | ||||||
|  |                                ancestors='/') | ||||||
|  | 
 | ||||||
|  |     model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, | ||||||
|  |                                  'first', first_image.docker_image_id, | ||||||
|  |                                  'sha:someshahere1', '{}') | ||||||
|  | 
 | ||||||
|  |     assert_not_deleted(repository, 'i1') | ||||||
|  | 
 | ||||||
|  |     # Delete the tag. | ||||||
|  |     delete_tag(repository, 'first') | ||||||
|  |     assert_deleted(repository, 'i1') | ||||||
|  | 
 | ||||||
|  |     # Ensure the CAS path is gone. | ||||||
|  |     assert not storage.exists({preferred}, storage.blob_path(digest)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_images_shared_cas(default_tag_policy, initialized_db): | ||||||
|  |   """ A repository, each two tags, pointing to the same image, which has image storage | ||||||
|  |       with the same *CAS path*, but *distinct records*. Deleting the first tag should delete the | ||||||
|  |       first image, and its storage, but not the file in storage, as it shares its CAS path. | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(expect_storage_removed=True): | ||||||
|  |     repository = create_repository() | ||||||
|  | 
 | ||||||
|  |     # Create two image storage records with the same content checksum. | ||||||
|  |     content = 'hello world' | ||||||
|  |     digest = 'sha256:' + hashlib.sha256(content).hexdigest() | ||||||
|  |     preferred = storage.preferred_locations[0] | ||||||
|  |     storage.put_content({preferred}, storage.blob_path(digest), content) | ||||||
|  | 
 | ||||||
|  |     is1 = database.ImageStorage.create(content_checksum=digest, uploading=False) | ||||||
|  |     is2 = database.ImageStorage.create(content_checksum=digest, uploading=False) | ||||||
|  | 
 | ||||||
|  |     location = database.ImageStorageLocation.get(name=preferred) | ||||||
|  | 
 | ||||||
|  |     database.ImageStoragePlacement.create(location=location, storage=is1) | ||||||
|  |     database.ImageStoragePlacement.create(location=location, storage=is2) | ||||||
|  | 
 | ||||||
|  |     # Ensure the CAS path exists. | ||||||
|  |     assert storage.exists({preferred}, storage.blob_path(digest)) | ||||||
|  | 
 | ||||||
|  |     # Create two images in the repository, and two tags, each pointing to one of the storages. | ||||||
|  |     first_image = Image.create(docker_image_id='i1', | ||||||
|  |                                repository=repository, storage=is1, | ||||||
|  |                                ancestors='/') | ||||||
|  | 
 | ||||||
|  |     second_image = Image.create(docker_image_id='i2', | ||||||
|  |                                 repository=repository, storage=is2, | ||||||
|  |                                 ancestors='/') | ||||||
|  | 
 | ||||||
|  |     model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, | ||||||
|  |                                  'first', first_image.docker_image_id, | ||||||
|  |                                  'sha:someshahere1', '{}') | ||||||
|  | 
 | ||||||
|  |     model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, | ||||||
|  |                                  'second', second_image.docker_image_id, | ||||||
|  |                                  'sha:someshahere2', '{}') | ||||||
|  | 
 | ||||||
|  |     assert_not_deleted(repository, 'i1', 'i2') | ||||||
|  | 
 | ||||||
|  |     # Delete the first tag. | ||||||
|  |     delete_tag(repository, 'first') | ||||||
|  |     assert_deleted(repository, 'i1') | ||||||
|  |     assert_not_deleted(repository, 'i2') | ||||||
|  | 
 | ||||||
|  |     # Ensure the CAS path still exists. | ||||||
|  |     assert storage.exists({preferred}, storage.blob_path(digest)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_images_shared_cas_with_new_blob_table(default_tag_policy, initialized_db): | ||||||
|  |   """ A repository with a tag and image that shares its CAS path with a record in the new Blob | ||||||
|  |       table. Deleting the first tag should delete the first image, and its storage, but not the | ||||||
|  |       file in storage, as it shares its CAS path with the blob row. | ||||||
|  |   """ | ||||||
|  |   with assert_gc_integrity(expect_storage_removed=True): | ||||||
|  |     repository = create_repository() | ||||||
|  | 
 | ||||||
|  |     # Create two image storage records with the same content checksum. | ||||||
|  |     content = 'hello world' | ||||||
|  |     digest = 'sha256:' + hashlib.sha256(content).hexdigest() | ||||||
|  |     preferred = storage.preferred_locations[0] | ||||||
|  |     storage.put_content({preferred}, storage.blob_path(digest), content) | ||||||
|  | 
 | ||||||
|  |     media_type = database.MediaType.get(name='text/plain') | ||||||
|  | 
 | ||||||
|  |     is1 = database.ImageStorage.create(content_checksum=digest, uploading=False) | ||||||
|  |     database.Blob.create(digest=digest, size=0, media_type=media_type) | ||||||
|  | 
 | ||||||
|  |     location = database.ImageStorageLocation.get(name=preferred) | ||||||
|  |     database.ImageStoragePlacement.create(location=location, storage=is1) | ||||||
|  | 
 | ||||||
|  |     # Ensure the CAS path exists. | ||||||
|  |     assert storage.exists({preferred}, storage.blob_path(digest)) | ||||||
|  | 
 | ||||||
|  |     # Create the image in the repository, and the tag. | ||||||
|  |     first_image = Image.create(docker_image_id='i1', | ||||||
|  |                                repository=repository, storage=is1, | ||||||
|  |                                ancestors='/') | ||||||
|  | 
 | ||||||
|  |     model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, | ||||||
|  |                                  'first', first_image.docker_image_id, | ||||||
|  |                                  'sha:someshahere1', '{}') | ||||||
|  | 
 | ||||||
|  |     assert_not_deleted(repository, 'i1') | ||||||
|  | 
 | ||||||
|  |     # Delete the tag. | ||||||
|  |     delete_tag(repository, 'first') | ||||||
|  |     assert_deleted(repository, 'i1') | ||||||
|  | 
 | ||||||
|  |     # Ensure the CAS path still exists, as it is referenced by the Blob table | ||||||
|  |     assert storage.exists({preferred}, storage.blob_path(digest)) | ||||||
							
								
								
									
										585
									
								
								test/test_gc.py
									
										
									
									
									
								
							
							
						
						
									
										585
									
								
								test/test_gc.py
									
										
									
									
									
								
							|  | @ -1,585 +0,0 @@ | ||||||
| import unittest |  | ||||||
| import time |  | ||||||
| import hashlib |  | ||||||
| 
 |  | ||||||
| from contextlib import contextmanager |  | ||||||
| from playhouse.test_utils import assert_query_count |  | ||||||
| 
 |  | ||||||
| from app import app, storage |  | ||||||
| from initdb import setup_database_for_testing, finished_database_for_testing |  | ||||||
| from data import model, database |  | ||||||
| from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel, Blob |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| ADMIN_ACCESS_USER = 'devtable' |  | ||||||
| PUBLIC_USER = 'public' |  | ||||||
| 
 |  | ||||||
| REPO = 'somerepo' |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| class TestGarbageCollection(unittest.TestCase): |  | ||||||
|   @staticmethod |  | ||||||
|   def _set_tag_expiration_policy(namespace, expiration_s): |  | ||||||
|     namespace_user = model.user.get_user(namespace) |  | ||||||
|     model.user.change_user_tag_expiration(namespace_user, expiration_s) |  | ||||||
| 
 |  | ||||||
|   def setUp(self): |  | ||||||
|     setup_database_for_testing(self) |  | ||||||
| 
 |  | ||||||
|     self._set_tag_expiration_policy(ADMIN_ACCESS_USER, 0) |  | ||||||
|     self._set_tag_expiration_policy(PUBLIC_USER, 0) |  | ||||||
| 
 |  | ||||||
|     self.app = app.test_client() |  | ||||||
|     self.ctx = app.test_request_context() |  | ||||||
|     self.ctx.__enter__() |  | ||||||
| 
 |  | ||||||
|   def tearDown(self): |  | ||||||
|     finished_database_for_testing(self) |  | ||||||
|     self.ctx.__exit__(True, None, None) |  | ||||||
| 
 |  | ||||||
|   @staticmethod |  | ||||||
|   def createImage(docker_image_id, repository_obj, username): |  | ||||||
|     preferred = storage.preferred_locations[0] |  | ||||||
|     image = model.image.find_create_or_link_image(docker_image_id, repository_obj, username, {}, |  | ||||||
|                                                   preferred) |  | ||||||
|     image.storage.uploading = False |  | ||||||
|     image.storage.save() |  | ||||||
| 
 |  | ||||||
|     # Create derived images as well. |  | ||||||
|     model.image.find_or_create_derived_storage(image, 'squash', preferred) |  | ||||||
|     model.image.find_or_create_derived_storage(image, 'aci', preferred) |  | ||||||
| 
 |  | ||||||
|     # Add some torrent info. |  | ||||||
|     try: |  | ||||||
|       database.TorrentInfo.get(storage=image.storage) |  | ||||||
|     except database.TorrentInfo.DoesNotExist: |  | ||||||
|       model.storage.save_torrent_info(image.storage, 1, 'helloworld') |  | ||||||
| 
 |  | ||||||
|     # Add some additional placements to the image. |  | ||||||
|     for location_name in ['local_eu']: |  | ||||||
|       location = database.ImageStorageLocation.get(name=location_name) |  | ||||||
| 
 |  | ||||||
|       try: |  | ||||||
|         database.ImageStoragePlacement.get(location=location, storage=image.storage) |  | ||||||
|       except: |  | ||||||
|         continue |  | ||||||
| 
 |  | ||||||
|       database.ImageStoragePlacement.create(location=location, storage=image.storage) |  | ||||||
| 
 |  | ||||||
|     return image.storage |  | ||||||
| 
 |  | ||||||
|   def createRepository(self, namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs): |  | ||||||
|     user = model.user.get_user(namespace) |  | ||||||
|     repo = model.repository.create_repository(namespace, name, user) |  | ||||||
| 
 |  | ||||||
|     # Populate the repository with the tags. |  | ||||||
|     image_map = {} |  | ||||||
|     for tag_name in kwargs: |  | ||||||
|       image_ids = kwargs[tag_name] |  | ||||||
|       parent = None |  | ||||||
| 
 |  | ||||||
|       for image_id in image_ids: |  | ||||||
|         if not image_id in image_map: |  | ||||||
|           image_map[image_id] = self.createImage(image_id, repo, namespace) |  | ||||||
| 
 |  | ||||||
|         v1_metadata = { |  | ||||||
|           'id': image_id, |  | ||||||
|         } |  | ||||||
|         if parent is not None: |  | ||||||
|           v1_metadata['parent'] = parent.docker_image_id |  | ||||||
| 
 |  | ||||||
|         # Set the ancestors for the image. |  | ||||||
|         parent = model.image.set_image_metadata(image_id, namespace, name, '', '', '', v1_metadata, |  | ||||||
|                                                 parent=parent) |  | ||||||
| 
 |  | ||||||
|       # Set the tag for the image. |  | ||||||
|       tag_manifest, _ = model.tag.store_tag_manifest(namespace, name, tag_name, image_ids[-1], |  | ||||||
|                                                      'sha:someshahere', '{}') |  | ||||||
| 
 |  | ||||||
|       # Add some labels to the tag. |  | ||||||
|       model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest') |  | ||||||
|       model.label.create_manifest_label(tag_manifest, 'meh', 'grah', 'manifest') |  | ||||||
| 
 |  | ||||||
|     return repo |  | ||||||
| 
 |  | ||||||
|   def gcNow(self, repository): |  | ||||||
|     self.assertTrue(model.repository.garbage_collect_repo(repository)) |  | ||||||
| 
 |  | ||||||
|   def deleteTag(self, repository, tag, perform_gc=True): |  | ||||||
|     model.tag.delete_tag(repository.namespace_user.username, repository.name, tag) |  | ||||||
|     if perform_gc: |  | ||||||
|       self.assertTrue(model.repository.garbage_collect_repo(repository)) |  | ||||||
| 
 |  | ||||||
|   def moveTag(self, repository, tag, docker_image_id): |  | ||||||
|     model.tag.create_or_update_tag(repository.namespace_user.username, repository.name, tag, |  | ||||||
|                                    docker_image_id) |  | ||||||
|     self.assertTrue(model.repository.garbage_collect_repo(repository)) |  | ||||||
| 
 |  | ||||||
|   def assertNotDeleted(self, repository, *args): |  | ||||||
|     for docker_image_id in args: |  | ||||||
|       self.assertTrue(bool(model.image.get_image_by_id(repository.namespace_user.username, |  | ||||||
|                                                        repository.name, docker_image_id))) |  | ||||||
| 
 |  | ||||||
|   def assertDeleted(self, repository, *args): |  | ||||||
|     for docker_image_id in args: |  | ||||||
|       try: |  | ||||||
|         # Verify the image is missing when accessed by the repository. |  | ||||||
|         model.image.get_image_by_id(repository.namespace_user.username, repository.name, |  | ||||||
|                                     docker_image_id) |  | ||||||
|       except model.DataModelException: |  | ||||||
|         return |  | ||||||
| 
 |  | ||||||
|       self.fail('Expected image %s to be deleted' % docker_image_id) |  | ||||||
| 
 |  | ||||||
|   @staticmethod |  | ||||||
|   def _get_dangling_storage_count(): |  | ||||||
|     storage_ids = set([current.id for current in ImageStorage.select()]) |  | ||||||
|     referenced_by_image = set([image.storage_id for image in Image.select()]) |  | ||||||
|     referenced_by_derived = set([derived.derivative_id |  | ||||||
|                                  for derived in DerivedStorageForImage.select()]) |  | ||||||
| 
 |  | ||||||
|     return len(storage_ids - referenced_by_image - referenced_by_derived) |  | ||||||
| 
 |  | ||||||
|   @staticmethod |  | ||||||
|   def _get_dangling_label_count(): |  | ||||||
|     label_ids = set([current.id for current in Label.select()]) |  | ||||||
|     referenced_by_manifest = set([mlabel.label_id for mlabel in TagManifestLabel.select()]) |  | ||||||
|     return len(label_ids - referenced_by_manifest) |  | ||||||
| 
 |  | ||||||
|   @contextmanager |  | ||||||
|   def assert_gc_integrity(self, expect_storage_removed=True): |  | ||||||
|     """ Specialized assertion for ensuring that GC cleans up all dangling storages |  | ||||||
|         and labels, invokes the callback for images removed and doesn't invoke the |  | ||||||
|         callback for images *not* removed. |  | ||||||
|     """ |  | ||||||
|     # TODO: Consider also asserting the number of DB queries being performed. |  | ||||||
| 
 |  | ||||||
|     # Add a callback for when images are removed. |  | ||||||
|     removed_image_storages = [] |  | ||||||
|     model.config.register_image_cleanup_callback(removed_image_storages.extend) |  | ||||||
| 
 |  | ||||||
|     # Store the number of dangling storages and labels. |  | ||||||
|     existing_storage_count = self._get_dangling_storage_count() |  | ||||||
|     existing_label_count = self._get_dangling_label_count() |  | ||||||
|     yield |  | ||||||
| 
 |  | ||||||
|     # Ensure the number of dangling storages and labels has not changed. |  | ||||||
|     updated_storage_count = self._get_dangling_storage_count() |  | ||||||
|     self.assertEqual(updated_storage_count, existing_storage_count) |  | ||||||
| 
 |  | ||||||
|     updated_label_count = self._get_dangling_label_count() |  | ||||||
|     self.assertEqual(updated_label_count, existing_label_count) |  | ||||||
| 
 |  | ||||||
|     # Ensure that for each call to the image+storage cleanup callback, the image and its |  | ||||||
|     # storage is not found *anywhere* in the database. |  | ||||||
|     for removed_image_and_storage in removed_image_storages: |  | ||||||
|       with self.assertRaises(Image.DoesNotExist): |  | ||||||
|         Image.get(id=removed_image_and_storage.id) |  | ||||||
| 
 |  | ||||||
|       with self.assertRaises(ImageStorage.DoesNotExist): |  | ||||||
|         ImageStorage.get(id=removed_image_and_storage.storage_id) |  | ||||||
| 
 |  | ||||||
|       with self.assertRaises(ImageStorage.DoesNotExist): |  | ||||||
|         ImageStorage.get(uuid=removed_image_and_storage.storage.uuid) |  | ||||||
| 
 |  | ||||||
|     self.assertEquals(expect_storage_removed, bool(removed_image_storages)) |  | ||||||
| 
 |  | ||||||
|     # Ensure all CAS storage is in the storage engine. |  | ||||||
|     preferred = storage.preferred_locations[0] |  | ||||||
|     for storage_row in ImageStorage.select(): |  | ||||||
|       if storage_row.cas_path: |  | ||||||
|         storage.get_content({preferred}, storage.blob_path(storage_row.content_checksum)) |  | ||||||
| 
 |  | ||||||
|     for blob_row in Blob.select(): |  | ||||||
|         storage.get_content({preferred}, storage.blob_path(blob_row.digest)) |  | ||||||
| 
 |  | ||||||
|   def test_has_garbage(self): |  | ||||||
|     """ Remove all existing repositories, then add one without garbage, check, then add one with |  | ||||||
|         garbage, and check again. |  | ||||||
|     """ |  | ||||||
|     # Delete all existing repos. |  | ||||||
|     for repo in database.Repository.select().order_by(database.Repository.id): |  | ||||||
|       self.assertTrue(model.repository.purge_repository(repo.namespace_user.username, repo.name)) |  | ||||||
| 
 |  | ||||||
|     # Change the time machine expiration on the namespace. |  | ||||||
|     (database.User |  | ||||||
|      .update(removed_tag_expiration_s=1000000000) |  | ||||||
|      .where(database.User.username == ADMIN_ACCESS_USER) |  | ||||||
|      .execute()) |  | ||||||
| 
 |  | ||||||
|     # Create a repository without any garbage. |  | ||||||
|     repository = self.createRepository(latest=['i1', 'i2', 'i3']) |  | ||||||
| 
 |  | ||||||
|     # Ensure that no repositories are returned by the has garbage check. |  | ||||||
|     self.assertIsNone(model.repository.find_repository_with_garbage(1000000000)) |  | ||||||
| 
 |  | ||||||
|     # Delete a tag. |  | ||||||
|     self.deleteTag(repository, 'latest', perform_gc=False) |  | ||||||
| 
 |  | ||||||
|     # There should still not be any repositories with garbage, due to time machine. |  | ||||||
|     self.assertIsNone(model.repository.find_repository_with_garbage(1000000000)) |  | ||||||
| 
 |  | ||||||
|     # Change the time machine expiration on the namespace. |  | ||||||
|     (database.User |  | ||||||
|      .update(removed_tag_expiration_s=0) |  | ||||||
|      .where(database.User.username == ADMIN_ACCESS_USER) |  | ||||||
|      .execute()) |  | ||||||
| 
 |  | ||||||
|     # Now we should find the repository for GC. |  | ||||||
|     repository = model.repository.find_repository_with_garbage(0) |  | ||||||
|     self.assertIsNotNone(repository) |  | ||||||
|     self.assertEquals(REPO, repository.name) |  | ||||||
| 
 |  | ||||||
|     # GC the repository. |  | ||||||
|     self.assertTrue(model.repository.garbage_collect_repo(repository)) |  | ||||||
| 
 |  | ||||||
|     # There should now be no repositories with garbage. |  | ||||||
|     self.assertIsNone(model.repository.find_repository_with_garbage(0)) |  | ||||||
| 
 |  | ||||||
|   def test_find_garbage_policy_functions(self): |  | ||||||
|     with assert_query_count(1): |  | ||||||
|       one_policy = model.repository.get_random_gc_policy() |  | ||||||
|       all_policies = model.repository._get_gc_expiration_policies() |  | ||||||
|     self.assertIn(one_policy, all_policies) |  | ||||||
| 
 |  | ||||||
|   def test_one_tag(self): |  | ||||||
|     """ Create a repository with a single tag, then remove that tag and verify that the repository |  | ||||||
|         is now empty. """ |  | ||||||
|     with self.assert_gc_integrity(): |  | ||||||
|       repository = self.createRepository(latest=['i1', 'i2', 'i3']) |  | ||||||
|       self.deleteTag(repository, 'latest') |  | ||||||
|       self.assertDeleted(repository, 'i1', 'i2', 'i3') |  | ||||||
| 
 |  | ||||||
|   def test_two_tags_unshared_images(self): |  | ||||||
|     """ Repository has two tags with no shared images between them. """ |  | ||||||
|     with self.assert_gc_integrity(): |  | ||||||
|       repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['f1', 'f2']) |  | ||||||
|       self.deleteTag(repository, 'latest') |  | ||||||
|       self.assertDeleted(repository, 'i1', 'i2', 'i3') |  | ||||||
|       self.assertNotDeleted(repository, 'f1', 'f2') |  | ||||||
| 
 |  | ||||||
|   def test_two_tags_shared_images(self): |  | ||||||
|     """ Repository has two tags with shared images. Deleting the tag should only remove the |  | ||||||
|         unshared images. |  | ||||||
|     """ |  | ||||||
|     with self.assert_gc_integrity(): |  | ||||||
|       repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) |  | ||||||
|       self.deleteTag(repository, 'latest') |  | ||||||
|       self.assertDeleted(repository, 'i2', 'i3') |  | ||||||
|       self.assertNotDeleted(repository, 'i1', 'f1') |  | ||||||
| 
 |  | ||||||
|   def test_unrelated_repositories(self): |  | ||||||
|     """ Two repositories with different images. Removing the tag from one leaves the other's |  | ||||||
|         images intact. |  | ||||||
|     """ |  | ||||||
|     with self.assert_gc_integrity(): |  | ||||||
|       repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') |  | ||||||
|       repository2 = self.createRepository(latest=['j1', 'j2', 'j3'], name='repo2') |  | ||||||
| 
 |  | ||||||
|       self.deleteTag(repository1, 'latest') |  | ||||||
| 
 |  | ||||||
|       self.assertDeleted(repository1, 'i1', 'i2', 'i3') |  | ||||||
|       self.assertNotDeleted(repository2, 'j1', 'j2', 'j3') |  | ||||||
| 
 |  | ||||||
|   def test_related_repositories(self): |  | ||||||
|     """ Two repositories with shared images. Removing the tag from one leaves the other's |  | ||||||
|         images intact. |  | ||||||
|     """ |  | ||||||
|     with self.assert_gc_integrity(): |  | ||||||
|       repository1 = self.createRepository(latest=['i1', 'i2', 'i3'], name='repo1') |  | ||||||
|       repository2 = self.createRepository(latest=['i1', 'i2', 'j1'], name='repo2') |  | ||||||
| 
 |  | ||||||
|       self.deleteTag(repository1, 'latest') |  | ||||||
| 
 |  | ||||||
|       self.assertDeleted(repository1, 'i3') |  | ||||||
|       self.assertNotDeleted(repository2, 'i1', 'i2', 'j1') |  | ||||||
| 
 |  | ||||||
|   def test_inaccessible_repositories(self): |  | ||||||
|     """ Two repositories under different namespaces should result in the images being deleted |  | ||||||
|         but not completely removed from the database. |  | ||||||
|     """ |  | ||||||
|     with self.assert_gc_integrity(): |  | ||||||
|       repository1 = self.createRepository(namespace=ADMIN_ACCESS_USER, latest=['i1', 'i2', 'i3']) |  | ||||||
|       repository2 = self.createRepository(namespace=PUBLIC_USER, latest=['i1', 'i2', 'i3']) |  | ||||||
| 
 |  | ||||||
|       self.deleteTag(repository1, 'latest') |  | ||||||
|       self.assertDeleted(repository1, 'i1', 'i2', 'i3') |  | ||||||
|       self.assertNotDeleted(repository2, 'i1', 'i2', 'i3') |  | ||||||
| 
 |  | ||||||
|   def test_multiple_shared_images(self): |  | ||||||
|     """ Repository has multiple tags with shared images. Selectively deleting the tags, and |  | ||||||
|         verifying at each step. |  | ||||||
|     """ |  | ||||||
|     with self.assert_gc_integrity(): |  | ||||||
|       repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], |  | ||||||
|                                          third=['t1', 't2', 't3'], fourth=['i1', 'f1']) |  | ||||||
| 
 |  | ||||||
|       # Current state: |  | ||||||
|       # latest -> i3->i2->i1 |  | ||||||
|       # other -> f2->f1->i1 |  | ||||||
|       # third -> t3->t2->t1 |  | ||||||
|       # fourth -> f1->i1 |  | ||||||
| 
 |  | ||||||
|       # Delete tag other. Should delete f2, since it is not shared. |  | ||||||
|       self.deleteTag(repository, 'other') |  | ||||||
|       self.assertDeleted(repository, 'f2') |  | ||||||
|       self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1') |  | ||||||
| 
 |  | ||||||
|       # Current state: |  | ||||||
|       # latest -> i3->i2->i1 |  | ||||||
|       # third -> t3->t2->t1 |  | ||||||
|       # fourth -> f1->i1 |  | ||||||
| 
 |  | ||||||
|       # Move tag fourth to i3. This should remove f1 since it is no longer referenced. |  | ||||||
|       self.moveTag(repository, 'fourth', 'i3') |  | ||||||
|       self.assertDeleted(repository, 'f1') |  | ||||||
|       self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') |  | ||||||
| 
 |  | ||||||
|       # Current state: |  | ||||||
|       # latest -> i3->i2->i1 |  | ||||||
|       # third -> t3->t2->t1 |  | ||||||
|       # fourth -> i3->i2->i1 |  | ||||||
| 
 |  | ||||||
|       # Delete tag 'latest'. This should do nothing since fourth is on the same branch. |  | ||||||
|       self.deleteTag(repository, 'latest') |  | ||||||
|       self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3') |  | ||||||
| 
 |  | ||||||
|       # Current state: |  | ||||||
|       # third -> t3->t2->t1 |  | ||||||
|       # fourth -> i3->i2->i1 |  | ||||||
| 
 |  | ||||||
|       # Delete tag 'third'. This should remove t1->t3. |  | ||||||
|       self.deleteTag(repository, 'third') |  | ||||||
|       self.assertDeleted(repository, 't1', 't2', 't3') |  | ||||||
|       self.assertNotDeleted(repository, 'i1', 'i2', 'i3') |  | ||||||
| 
 |  | ||||||
|       # Current state: |  | ||||||
|       # fourth -> i3->i2->i1 |  | ||||||
| 
 |  | ||||||
|       # Add tag to i1. |  | ||||||
|       self.moveTag(repository, 'newtag', 'i1') |  | ||||||
|       self.assertNotDeleted(repository, 'i1', 'i2', 'i3') |  | ||||||
| 
 |  | ||||||
|       # Current state: |  | ||||||
|       # fourth -> i3->i2->i1 |  | ||||||
|       # newtag -> i1 |  | ||||||
| 
 |  | ||||||
|       # Delete tag 'fourth'. This should remove i2 and i3. |  | ||||||
|       self.deleteTag(repository, 'fourth') |  | ||||||
|       self.assertDeleted(repository, 'i2', 'i3') |  | ||||||
|       self.assertNotDeleted(repository, 'i1') |  | ||||||
| 
 |  | ||||||
|       # Current state: |  | ||||||
|       # newtag -> i1 |  | ||||||
| 
 |  | ||||||
|       # Delete tag 'newtag'. This should remove the remaining image. |  | ||||||
|       self.deleteTag(repository, 'newtag') |  | ||||||
|       self.assertDeleted(repository, 'i1') |  | ||||||
| 
 |  | ||||||
|       # Current state: |  | ||||||
|       # (Empty) |  | ||||||
| 
 |  | ||||||
|   def test_empty_gc(self): |  | ||||||
|     with self.assert_gc_integrity(expect_storage_removed=False): |  | ||||||
|       repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1', 'f2'], |  | ||||||
|                                          third=['t1', 't2', 't3'], fourth=['i1', 'f1']) |  | ||||||
| 
 |  | ||||||
|       self.gcNow(repository) |  | ||||||
|       self.assertNotDeleted(repository, 'i1', 'i2', 'i3', 't1', 't2', 't3', 'f1', 'f2') |  | ||||||
| 
 |  | ||||||
|   def test_time_machine_no_gc(self): |  | ||||||
|     """ Repository has two tags with shared images. Deleting the tag should not remove any images |  | ||||||
|     """ |  | ||||||
|     with self.assert_gc_integrity(expect_storage_removed=False): |  | ||||||
|       repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) |  | ||||||
|       self._set_tag_expiration_policy(repository.namespace_user.username, 60*60*24) |  | ||||||
| 
 |  | ||||||
|       self.deleteTag(repository, 'latest') |  | ||||||
|       self.assertNotDeleted(repository, 'i2', 'i3') |  | ||||||
|       self.assertNotDeleted(repository, 'i1', 'f1') |  | ||||||
| 
 |  | ||||||
|   def test_time_machine_gc(self): |  | ||||||
|     """ Repository has two tags with shared images. Deleting the second tag should cause the images |  | ||||||
|         for the first deleted tag to gc. |  | ||||||
|     """ |  | ||||||
|     with self.assert_gc_integrity(): |  | ||||||
|       repository = self.createRepository(latest=['i1', 'i2', 'i3'], other=['i1', 'f1']) |  | ||||||
| 
 |  | ||||||
|       self._set_tag_expiration_policy(repository.namespace_user.username, 1) |  | ||||||
| 
 |  | ||||||
|       self.deleteTag(repository, 'latest') |  | ||||||
|       self.assertNotDeleted(repository, 'i2', 'i3') |  | ||||||
|       self.assertNotDeleted(repository, 'i1', 'f1') |  | ||||||
| 
 |  | ||||||
|       time.sleep(2) |  | ||||||
| 
 |  | ||||||
|       self.deleteTag(repository, 'other')  # This will cause the images associated with latest to gc |  | ||||||
|       self.assertDeleted(repository, 'i2', 'i3') |  | ||||||
|       self.assertNotDeleted(repository, 'i1', 'f1') |  | ||||||
| 
 |  | ||||||
|   def test_images_shared_storage(self): |  | ||||||
|     """ Repository with two tags, both with the same shared storage. Deleting the first |  | ||||||
|         tag should delete the first image, but *not* its storage. |  | ||||||
|     """ |  | ||||||
|     with self.assert_gc_integrity(expect_storage_removed=False): |  | ||||||
|       repository = self.createRepository() |  | ||||||
| 
 |  | ||||||
|       # Add two tags, each with their own image, but with the same storage. |  | ||||||
|       image_storage = model.storage.create_v1_storage(storage.preferred_locations[0]) |  | ||||||
| 
 |  | ||||||
|       first_image = Image.create(docker_image_id='i1', |  | ||||||
|                                  repository=repository, storage=image_storage, |  | ||||||
|                                  ancestors='/') |  | ||||||
| 
 |  | ||||||
|       second_image = Image.create(docker_image_id='i2', |  | ||||||
|                                   repository=repository, storage=image_storage, |  | ||||||
|                                   ancestors='/') |  | ||||||
| 
 |  | ||||||
|       model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, |  | ||||||
|                                    'first', first_image.docker_image_id, |  | ||||||
|                                    'sha:someshahere', '{}') |  | ||||||
| 
 |  | ||||||
|       model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, |  | ||||||
|                                    'second', second_image.docker_image_id, |  | ||||||
|                                    'sha:someshahere', '{}') |  | ||||||
| 
 |  | ||||||
|       # Delete the first tag. |  | ||||||
|       self.deleteTag(repository, 'first') |  | ||||||
|       self.assertDeleted(repository, 'i1') |  | ||||||
|       self.assertNotDeleted(repository, 'i2') |  | ||||||
| 
 |  | ||||||
|   def test_image_with_cas(self): |  | ||||||
|     """ A repository with a tag pointing to an image backed by CAS. Deleting and GCing the tag |  | ||||||
|         should result in the storage and its CAS data being removed. |  | ||||||
|     """ |  | ||||||
|     with self.assert_gc_integrity(expect_storage_removed=True): |  | ||||||
|       repository = self.createRepository() |  | ||||||
| 
 |  | ||||||
|       # Create an image storage record under CAS. |  | ||||||
|       content = 'hello world' |  | ||||||
|       digest = 'sha256:' + hashlib.sha256(content).hexdigest() |  | ||||||
|       preferred = storage.preferred_locations[0] |  | ||||||
|       storage.put_content({preferred}, storage.blob_path(digest), content) |  | ||||||
| 
 |  | ||||||
|       image_storage = database.ImageStorage.create(content_checksum=digest, uploading=False) |  | ||||||
|       location = database.ImageStorageLocation.get(name=preferred) |  | ||||||
|       database.ImageStoragePlacement.create(location=location, storage=image_storage) |  | ||||||
| 
 |  | ||||||
|       # Ensure the CAS path exists. |  | ||||||
|       self.assertTrue(storage.exists({preferred}, storage.blob_path(digest))) |  | ||||||
| 
 |  | ||||||
|       # Create the image and the tag. |  | ||||||
|       first_image = Image.create(docker_image_id='i1', |  | ||||||
|                                  repository=repository, storage=image_storage, |  | ||||||
|                                  ancestors='/') |  | ||||||
| 
 |  | ||||||
|       model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, |  | ||||||
|                                    'first', first_image.docker_image_id, |  | ||||||
|                                    'sha:someshahere1', '{}') |  | ||||||
| 
 |  | ||||||
|       self.assertNotDeleted(repository, 'i1') |  | ||||||
| 
 |  | ||||||
|       # Delete the tag. |  | ||||||
|       self.deleteTag(repository, 'first') |  | ||||||
|       self.assertDeleted(repository, 'i1') |  | ||||||
| 
 |  | ||||||
|       # Ensure the CAS path is gone. |  | ||||||
|       self.assertFalse(storage.exists({preferred}, storage.blob_path(digest))) |  | ||||||
| 
 |  | ||||||
|   def test_images_shared_cas(self): |  | ||||||
|     """ A repository, each two tags, pointing to the same image, which has image storage |  | ||||||
|         with the same *CAS path*, but *distinct records*. Deleting the first tag should delete the |  | ||||||
|         first image, and its storage, but not the file in storage, as it shares its CAS path. |  | ||||||
|     """ |  | ||||||
|     with self.assert_gc_integrity(expect_storage_removed=True): |  | ||||||
|       repository = self.createRepository() |  | ||||||
| 
 |  | ||||||
|       # Create two image storage records with the same content checksum. |  | ||||||
|       content = 'hello world' |  | ||||||
|       digest = 'sha256:' + hashlib.sha256(content).hexdigest() |  | ||||||
|       preferred = storage.preferred_locations[0] |  | ||||||
|       storage.put_content({preferred}, storage.blob_path(digest), content) |  | ||||||
| 
 |  | ||||||
|       is1 = database.ImageStorage.create(content_checksum=digest, uploading=False) |  | ||||||
|       is2 = database.ImageStorage.create(content_checksum=digest, uploading=False) |  | ||||||
| 
 |  | ||||||
|       location = database.ImageStorageLocation.get(name=preferred) |  | ||||||
| 
 |  | ||||||
|       database.ImageStoragePlacement.create(location=location, storage=is1) |  | ||||||
|       database.ImageStoragePlacement.create(location=location, storage=is2) |  | ||||||
| 
 |  | ||||||
|       # Ensure the CAS path exists. |  | ||||||
|       self.assertTrue(storage.exists({preferred}, storage.blob_path(digest))) |  | ||||||
| 
 |  | ||||||
|       # Create two images in the repository, and two tags, each pointing to one of the storages. |  | ||||||
|       first_image = Image.create(docker_image_id='i1', |  | ||||||
|                                  repository=repository, storage=is1, |  | ||||||
|                                  ancestors='/') |  | ||||||
| 
 |  | ||||||
|       second_image = Image.create(docker_image_id='i2', |  | ||||||
|                                   repository=repository, storage=is2, |  | ||||||
|                                   ancestors='/') |  | ||||||
| 
 |  | ||||||
|       model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, |  | ||||||
|                                    'first', first_image.docker_image_id, |  | ||||||
|                                    'sha:someshahere1', '{}') |  | ||||||
| 
 |  | ||||||
|       model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, |  | ||||||
|                                    'second', second_image.docker_image_id, |  | ||||||
|                                    'sha:someshahere2', '{}') |  | ||||||
| 
 |  | ||||||
|       self.assertNotDeleted(repository, 'i1', 'i2') |  | ||||||
| 
 |  | ||||||
|       # Delete the first tag. |  | ||||||
|       self.deleteTag(repository, 'first') |  | ||||||
|       self.assertDeleted(repository, 'i1') |  | ||||||
|       self.assertNotDeleted(repository, 'i2') |  | ||||||
| 
 |  | ||||||
|       # Ensure the CAS path still exists. |  | ||||||
|       self.assertTrue(storage.exists({preferred}, storage.blob_path(digest))) |  | ||||||
| 
 |  | ||||||
|   def test_images_shared_cas_with_new_blob_table(self): |  | ||||||
|     """ A repository with a tag and image that shares its CAS path with a record in the new Blob |  | ||||||
|         table. Deleting the first tag should delete the first image, and its storage, but not the |  | ||||||
|         file in storage, as it shares its CAS path with the blob row. |  | ||||||
|     """ |  | ||||||
|     with self.assert_gc_integrity(expect_storage_removed=True): |  | ||||||
|       repository = self.createRepository() |  | ||||||
| 
 |  | ||||||
|       # Create two image storage records with the same content checksum. |  | ||||||
|       content = 'hello world' |  | ||||||
|       digest = 'sha256:' + hashlib.sha256(content).hexdigest() |  | ||||||
|       preferred = storage.preferred_locations[0] |  | ||||||
|       storage.put_content({preferred}, storage.blob_path(digest), content) |  | ||||||
| 
 |  | ||||||
|       media_type = database.MediaType.get(name='text/plain') |  | ||||||
| 
 |  | ||||||
|       is1 = database.ImageStorage.create(content_checksum=digest, uploading=False) |  | ||||||
|       is2 = database.Blob.create(digest=digest, size=0, media_type=media_type) |  | ||||||
| 
 |  | ||||||
|       location = database.ImageStorageLocation.get(name=preferred) |  | ||||||
|       database.ImageStoragePlacement.create(location=location, storage=is1) |  | ||||||
| 
 |  | ||||||
|       # Ensure the CAS path exists. |  | ||||||
|       self.assertTrue(storage.exists({preferred}, storage.blob_path(digest))) |  | ||||||
| 
 |  | ||||||
|       # Create the image in the repository, and the tag. |  | ||||||
|       first_image = Image.create(docker_image_id='i1', |  | ||||||
|                                  repository=repository, storage=is1, |  | ||||||
|                                  ancestors='/') |  | ||||||
| 
 |  | ||||||
|       model.tag.store_tag_manifest(repository.namespace_user.username, repository.name, |  | ||||||
|                                    'first', first_image.docker_image_id, |  | ||||||
|                                    'sha:someshahere1', '{}') |  | ||||||
| 
 |  | ||||||
|       self.assertNotDeleted(repository, 'i1') |  | ||||||
| 
 |  | ||||||
|       # Delete the tag. |  | ||||||
|       self.deleteTag(repository, 'first') |  | ||||||
|       self.assertDeleted(repository, 'i1') |  | ||||||
| 
 |  | ||||||
|       # Ensure the CAS path still exists, as it is referenced by the Blob table. |  | ||||||
|       self.assertTrue(storage.exists({preferred}, storage.blob_path(digest))) |  | ||||||
| 
 |  | ||||||
| if __name__ == '__main__': |  | ||||||
|   unittest.main() |  | ||||||
		Reference in a new issue