Add warning when CAS paths are skipped and ensure we are under a transaction

This commit is contained in:
Joseph Schorr 2017-03-08 17:01:07 -05:00
parent 69e550d125
commit 62312e6461
2 changed files with 38 additions and 21 deletions

View file

@ -8,7 +8,8 @@ from data.model import (config, db_transaction, InvalidImageException, TorrentIn
DataModelException, _basequery)
from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation,
ImageStorageTransformation, ImageStorageSignature,
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo)
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo,
ensure_under_transaction)
logger = logging.getLogger(__name__)
@ -83,29 +84,34 @@ def garbage_collect_storage(storage_id_whitelist):
""" Returns the list of paths to remove from storage, filtered from the given placements
query by removing any CAS paths that are still referenced by storage(s) in the database.
"""
if not placements_list:
return set()
with ensure_under_transaction():
if not placements_list:
return set()
# Find the content checksums not referenced by other storages. Any that are, we cannot
# remove.
content_checksums = set([placement.storage.content_checksum for placement in placements_list
if placement.storage.cas_path])
# Find the content checksums not referenced by other storages. Any that are, we cannot
# remove.
content_checksums = set([placement.storage.content_checksum for placement in placements_list
if placement.storage.cas_path])
unreferenced_checksums = set()
if content_checksums:
query = (ImageStorage
.select(ImageStorage.content_checksum)
.where(ImageStorage.content_checksum << list(content_checksums)))
referenced_checksums = set([image_storage.content_checksum for image_storage in query])
unreferenced_checksums = content_checksums - referenced_checksums
unreferenced_checksums = set()
if content_checksums:
query = (ImageStorage
.select(ImageStorage.content_checksum)
.where(ImageStorage.content_checksum << list(content_checksums)))
referenced_checksums = set([image_storage.content_checksum for image_storage in query])
if referenced_checksums:
logger.warning('GC attempted to remove CAS checksums %s, which are still referenced',
referenced_checksums)
# Return all placements for all image storages found not at a CAS path or with a content
# checksum that is referenced.
return {(get_image_location_for_id(placement.location_id).name,
get_layer_path(placement.storage))
for placement in placements_list
if not placement.storage.cas_path or
placement.storage.content_checksum in unreferenced_checksums}
unreferenced_checksums = content_checksums - referenced_checksums
# Return all placements for all image storages found not at a CAS path or with a content
# checksum that is referenced.
return {(get_image_location_for_id(placement.location_id).name,
get_layer_path(placement.storage))
for placement in placements_list
if not placement.storage.cas_path or
placement.storage.content_checksum in unreferenced_checksums}
# Note: Both of these deletes must occur in the same transaction (unfortunately) because a
# storage without any placement is invalid, and a placement cannot exist without a storage.