Fix GC handling around CAS paths
Adds code to ensure we never GC CAS paths that are shared amongst multiple ImageStorage rows, as well as an associated pair of tests to catch the positive and negative cases.
This commit is contained in:
parent
16ccc946f3
commit
69e550d125
2 changed files with 129 additions and 5 deletions
|
@ -1,5 +1,6 @@
|
|||
import unittest
|
||||
import time
|
||||
import hashlib
|
||||
|
||||
from contextlib import contextmanager
|
||||
from playhouse.test_utils import assert_query_count
|
||||
|
@ -183,6 +184,12 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
|
||||
self.assertEquals(expect_storage_removed, bool(removed_image_storages))
|
||||
|
||||
# Ensure all CAS storage is in the storage engine.
|
||||
preferred = storage.preferred_locations[0]
|
||||
for storage_row in ImageStorage.select():
|
||||
if storage_row.cas_path:
|
||||
storage.get_content({preferred}, storage.blob_path(storage_row.content_checksum))
|
||||
|
||||
def test_has_garbage(self):
|
||||
""" Remove all existing repositories, then add one without garbage, check, then add one with
|
||||
garbage, and check again.
|
||||
|
@ -405,6 +412,97 @@ class TestGarbageCollection(unittest.TestCase):
|
|||
self.assertDeleted(repository, 'i1')
|
||||
self.assertNotDeleted(repository, 'i2')
|
||||
|
||||
def test_image_with_cas(self):
|
||||
""" A repository with a tag pointing to an image backed by CAS. Deleting and GCing the tag
|
||||
should result in the storage and its CAS data being removed.
|
||||
"""
|
||||
with self.assert_gc_integrity(expect_storage_removed=True):
|
||||
repository = self.createRepository()
|
||||
|
||||
# Create an image storage record under CAS.
|
||||
content = 'hello world'
|
||||
digest = 'sha256:' + hashlib.sha256(content).hexdigest()
|
||||
preferred = storage.preferred_locations[0]
|
||||
storage.put_content({preferred}, storage.blob_path(digest), content)
|
||||
|
||||
image_storage = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||
location = database.ImageStorageLocation.get(name=preferred)
|
||||
database.ImageStoragePlacement.create(location=location, storage=image_storage)
|
||||
|
||||
# Ensure the CAS path exists.
|
||||
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
||||
|
||||
# Create the image and the tag.
|
||||
first_image = Image.create(docker_image_id='i1',
|
||||
repository=repository, storage=image_storage,
|
||||
ancestors='/')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'first', first_image.docker_image_id,
|
||||
'sha:someshahere1', '{}')
|
||||
|
||||
self.assertNotDeleted(repository, 'i1')
|
||||
|
||||
# Delete the tag.
|
||||
self.deleteTag(repository, 'first')
|
||||
self.assertDeleted(repository, 'i1')
|
||||
|
||||
# Ensure the CAS path is gone.
|
||||
self.assertFalse(storage.exists({preferred}, storage.blob_path(digest)))
|
||||
|
||||
def test_images_shared_cas(self):
|
||||
""" A repository, each two tags, pointing to the same image, which has image storage
|
||||
with the same *CAS path*, but *distinct records*. Deleting the first tag should delete the
|
||||
first image, and its storage, but not the file in storage, as it shares its CAS path.
|
||||
"""
|
||||
with self.assert_gc_integrity(expect_storage_removed=True):
|
||||
repository = self.createRepository()
|
||||
|
||||
# Create two image storage records with the same content checksum.
|
||||
content = 'hello world'
|
||||
digest = 'sha256:' + hashlib.sha256(content).hexdigest()
|
||||
preferred = storage.preferred_locations[0]
|
||||
storage.put_content({preferred}, storage.blob_path(digest), content)
|
||||
|
||||
is1 = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||
is2 = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||
|
||||
location = database.ImageStorageLocation.get(name=preferred)
|
||||
|
||||
database.ImageStoragePlacement.create(location=location, storage=is1)
|
||||
database.ImageStoragePlacement.create(location=location, storage=is2)
|
||||
|
||||
# Ensure the CAS path exists.
|
||||
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
||||
|
||||
# Create two images in the repository, and two tags, each pointing to one of the storages.
|
||||
first_image = Image.create(docker_image_id='i1',
|
||||
repository=repository, storage=is1,
|
||||
ancestors='/')
|
||||
|
||||
second_image = Image.create(docker_image_id='i2',
|
||||
repository=repository, storage=is2,
|
||||
ancestors='/')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'first', first_image.docker_image_id,
|
||||
'sha:someshahere1', '{}')
|
||||
|
||||
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||
'second', second_image.docker_image_id,
|
||||
'sha:someshahere2', '{}')
|
||||
|
||||
self.assertNotDeleted(repository, 'i1', 'i2')
|
||||
|
||||
# Delete the first tag.
|
||||
self.deleteTag(repository, 'first')
|
||||
self.assertDeleted(repository, 'i1')
|
||||
self.assertNotDeleted(repository, 'i2')
|
||||
|
||||
# Ensure the CAS path still exists.
|
||||
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
Reference in a new issue