Make sure GC checks new Blob table as well before deleting CAS storage
This commit is contained in:
parent
3d0e63d8e5
commit
bdda74d6df
2 changed files with 60 additions and 7 deletions
|
@ -8,7 +8,7 @@ from data.model import (config, db_transaction, InvalidImageException, TorrentIn
|
||||||
DataModelException, _basequery)
|
DataModelException, _basequery)
|
||||||
from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation,
|
from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation,
|
||||||
ImageStorageTransformation, ImageStorageSignature,
|
ImageStorageTransformation, ImageStorageSignature,
|
||||||
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo,
|
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo, Blob,
|
||||||
ensure_under_transaction)
|
ensure_under_transaction)
|
||||||
|
|
||||||
|
|
||||||
|
@ -95,15 +95,24 @@ def garbage_collect_storage(storage_id_whitelist):
|
||||||
|
|
||||||
unreferenced_checksums = set()
|
unreferenced_checksums = set()
|
||||||
if content_checksums:
|
if content_checksums:
|
||||||
|
# Check the current image storage.
|
||||||
query = (ImageStorage
|
query = (ImageStorage
|
||||||
.select(ImageStorage.content_checksum)
|
.select(ImageStorage.content_checksum)
|
||||||
.where(ImageStorage.content_checksum << list(content_checksums)))
|
.where(ImageStorage.content_checksum << list(content_checksums)))
|
||||||
referenced_checksums = set([image_storage.content_checksum for image_storage in query])
|
is_referenced_checksums = set([image_storage.content_checksum for image_storage in query])
|
||||||
if referenced_checksums:
|
if is_referenced_checksums:
|
||||||
logger.warning('GC attempted to remove CAS checksums %s, which are still referenced',
|
logger.warning('GC attempted to remove CAS checksums %s, which are still IS referenced',
|
||||||
referenced_checksums)
|
is_referenced_checksums)
|
||||||
|
|
||||||
unreferenced_checksums = content_checksums - referenced_checksums
|
# Check the new Blob table as well.
|
||||||
|
query = Blob.select(Blob.digest).where(Blob.digest << list(content_checksums))
|
||||||
|
blob_referenced_checksums = set([blob.digest for blob in query])
|
||||||
|
if blob_referenced_checksums:
|
||||||
|
logger.warning('GC attempted to remove CAS checksums %s, which are still Blob referenced',
|
||||||
|
blob_referenced_checksums)
|
||||||
|
|
||||||
|
unreferenced_checksums = (content_checksums - blob_referenced_checksums -
|
||||||
|
is_referenced_checksums)
|
||||||
|
|
||||||
# Return all placements for all image storages found not at a CAS path or with a content
|
# Return all placements for all image storages found not at a CAS path or with a content
|
||||||
# checksum that is referenced.
|
# checksum that is referenced.
|
||||||
|
|
|
@ -8,7 +8,7 @@ from playhouse.test_utils import assert_query_count
|
||||||
from app import app, storage
|
from app import app, storage
|
||||||
from initdb import setup_database_for_testing, finished_database_for_testing
|
from initdb import setup_database_for_testing, finished_database_for_testing
|
||||||
from data import model, database
|
from data import model, database
|
||||||
from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel
|
from data.database import Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel, Blob
|
||||||
|
|
||||||
|
|
||||||
ADMIN_ACCESS_USER = 'devtable'
|
ADMIN_ACCESS_USER = 'devtable'
|
||||||
|
@ -190,6 +190,9 @@ class TestGarbageCollection(unittest.TestCase):
|
||||||
if storage_row.cas_path:
|
if storage_row.cas_path:
|
||||||
storage.get_content({preferred}, storage.blob_path(storage_row.content_checksum))
|
storage.get_content({preferred}, storage.blob_path(storage_row.content_checksum))
|
||||||
|
|
||||||
|
for blob_row in Blob.select():
|
||||||
|
storage.get_content({preferred}, storage.blob_path(blob_row.digest))
|
||||||
|
|
||||||
def test_has_garbage(self):
|
def test_has_garbage(self):
|
||||||
""" Remove all existing repositories, then add one without garbage, check, then add one with
|
""" Remove all existing repositories, then add one without garbage, check, then add one with
|
||||||
garbage, and check again.
|
garbage, and check again.
|
||||||
|
@ -502,7 +505,48 @@ class TestGarbageCollection(unittest.TestCase):
|
||||||
# Ensure the CAS path still exists.
|
# Ensure the CAS path still exists.
|
||||||
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
||||||
|
|
||||||
|
def test_images_shared_cas_with_new_blob_table(self):
|
||||||
|
""" A repository with a tag and image that shares its CAS path with a record in the new Blob
|
||||||
|
table. Deleting the first tag should delete the first image, and its storage, but not the
|
||||||
|
file in storage, as it shares its CAS path with the blob row.
|
||||||
|
"""
|
||||||
|
with self.assert_gc_integrity(expect_storage_removed=True):
|
||||||
|
repository = self.createRepository()
|
||||||
|
|
||||||
|
# Create two image storage records with the same content checksum.
|
||||||
|
content = 'hello world'
|
||||||
|
digest = 'sha256:' + hashlib.sha256(content).hexdigest()
|
||||||
|
preferred = storage.preferred_locations[0]
|
||||||
|
storage.put_content({preferred}, storage.blob_path(digest), content)
|
||||||
|
|
||||||
|
media_type = database.MediaType.get(name='text/plain')
|
||||||
|
|
||||||
|
is1 = database.ImageStorage.create(content_checksum=digest, uploading=False)
|
||||||
|
is2 = database.Blob.create(digest=digest, size=0, media_type=media_type)
|
||||||
|
|
||||||
|
location = database.ImageStorageLocation.get(name=preferred)
|
||||||
|
database.ImageStoragePlacement.create(location=location, storage=is1)
|
||||||
|
|
||||||
|
# Ensure the CAS path exists.
|
||||||
|
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
||||||
|
|
||||||
|
# Create the image in the repository, and the tag.
|
||||||
|
first_image = Image.create(docker_image_id='i1',
|
||||||
|
repository=repository, storage=is1,
|
||||||
|
ancestors='/')
|
||||||
|
|
||||||
|
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
|
||||||
|
'first', first_image.docker_image_id,
|
||||||
|
'sha:someshahere1', '{}')
|
||||||
|
|
||||||
|
self.assertNotDeleted(repository, 'i1')
|
||||||
|
|
||||||
|
# Delete the tag.
|
||||||
|
self.deleteTag(repository, 'first')
|
||||||
|
self.assertDeleted(repository, 'i1')
|
||||||
|
|
||||||
|
# Ensure the CAS path still exists, as it is referenced by the Blob table.
|
||||||
|
self.assertTrue(storage.exists({preferred}, storage.blob_path(digest)))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
Reference in a new issue