Add warning when CAS paths are skipped and ensure we are under a transaction

This commit is contained in:
Joseph Schorr 2017-03-08 17:01:07 -05:00
parent 69e550d125
commit 62312e6461
2 changed files with 38 additions and 21 deletions

View file

@ -7,6 +7,7 @@ import sys
import time
import uuid
from contextlib import contextmanager
from collections import defaultdict
from datetime import datetime
from random import SystemRandom
@ -230,6 +231,7 @@ db_match_func = CallableProxy()
db_for_update = CallableProxy()
db_transaction = CallableProxy()
db_concat_func = CallableProxy()
ensure_under_transaction = CallableProxy()
def validate_database_url(url, db_kwargs, connect_timeout=5):
@ -286,7 +288,16 @@ def configure(config_object):
def _db_transaction():
return config_object['DB_TRANSACTION_FACTORY'](db)
@contextmanager
def _ensure_under_transaction():
if not config_object['TESTING']:
if db.transaction_depth() == 0:
raise Exception('Expected to be under a transaction')
yield
db_transaction.initialize(_db_transaction)
ensure_under_transaction.initialize(_ensure_under_transaction)
def random_string_generator(length=16):
def random_string():

View file

@ -8,7 +8,8 @@ from data.model import (config, db_transaction, InvalidImageException, TorrentIn
DataModelException, _basequery)
from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation,
ImageStorageTransformation, ImageStorageSignature,
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo)
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo,
ensure_under_transaction)
logger = logging.getLogger(__name__)
@ -83,29 +84,34 @@ def garbage_collect_storage(storage_id_whitelist):
""" Returns the list of paths to remove from storage, filtered from the given placements
query by removing any CAS paths that are still referenced by storage(s) in the database.
"""
if not placements_list:
return set()
with ensure_under_transaction():
if not placements_list:
return set()
# Find the content checksums not referenced by other storages. Any that are, we cannot
# remove.
content_checksums = set([placement.storage.content_checksum for placement in placements_list
if placement.storage.cas_path])
# Find the content checksums not referenced by other storages. Any that are, we cannot
# remove.
content_checksums = set([placement.storage.content_checksum for placement in placements_list
if placement.storage.cas_path])
unreferenced_checksums = set()
if content_checksums:
query = (ImageStorage
.select(ImageStorage.content_checksum)
.where(ImageStorage.content_checksum << list(content_checksums)))
referenced_checksums = set([image_storage.content_checksum for image_storage in query])
unreferenced_checksums = content_checksums - referenced_checksums
unreferenced_checksums = set()
if content_checksums:
query = (ImageStorage
.select(ImageStorage.content_checksum)
.where(ImageStorage.content_checksum << list(content_checksums)))
referenced_checksums = set([image_storage.content_checksum for image_storage in query])
if referenced_checksums:
logger.warning('GC attempted to remove CAS checksums %s, which are still referenced',
referenced_checksums)
# Return all placements for all image storages found not at a CAS path or with a content
# checksum that is referenced.
return {(get_image_location_for_id(placement.location_id).name,
get_layer_path(placement.storage))
for placement in placements_list
if not placement.storage.cas_path or
placement.storage.content_checksum in unreferenced_checksums}
unreferenced_checksums = content_checksums - referenced_checksums
# Return all placements for all image storages found not at a CAS path or with a content
# checksum that is referenced.
return {(get_image_location_for_id(placement.location_id).name,
get_layer_path(placement.storage))
for placement in placements_list
if not placement.storage.cas_path or
placement.storage.content_checksum in unreferenced_checksums}
# Note: Both of these deletes must occur in the same transaction (unfortunately) because a
# storage without any placement is invalid, and a placement cannot exist without a storage.