Add warning when CAS paths are skipped and ensure we are under a transaction

This commit is contained in:
Joseph Schorr 2017-03-08 17:01:07 -05:00
parent 69e550d125
commit 62312e6461
2 changed files with 38 additions and 21 deletions

View file

@ -7,6 +7,7 @@ import sys
import time
import uuid
from contextlib import contextmanager
from collections import defaultdict
from datetime import datetime
from random import SystemRandom
@ -230,6 +231,7 @@ db_match_func = CallableProxy()
db_for_update = CallableProxy()
db_transaction = CallableProxy()
db_concat_func = CallableProxy()
ensure_under_transaction = CallableProxy()
def validate_database_url(url, db_kwargs, connect_timeout=5):
@ -286,7 +288,16 @@ def configure(config_object):
def _db_transaction():
return config_object['DB_TRANSACTION_FACTORY'](db)
@contextmanager
def _ensure_under_transaction():
if not config_object['TESTING']:
if db.transaction_depth() == 0:
raise Exception('Expected to be under a transaction')
yield
db_transaction.initialize(_db_transaction)
ensure_under_transaction.initialize(_ensure_under_transaction)
def random_string_generator(length=16):
def random_string():

View file

@ -8,7 +8,8 @@ from data.model import (config, db_transaction, InvalidImageException, TorrentIn
DataModelException, _basequery)
from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation,
ImageStorageTransformation, ImageStorageSignature,
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo)
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo,
ensure_under_transaction)
logger = logging.getLogger(__name__)
@ -83,6 +84,7 @@ def garbage_collect_storage(storage_id_whitelist):
""" Returns the list of paths to remove from storage, filtered from the given placements
query by removing any CAS paths that are still referenced by storage(s) in the database.
"""
with ensure_under_transaction():
if not placements_list:
return set()
@ -97,6 +99,10 @@ def garbage_collect_storage(storage_id_whitelist):
.select(ImageStorage.content_checksum)
.where(ImageStorage.content_checksum << list(content_checksums)))
referenced_checksums = set([image_storage.content_checksum for image_storage in query])
if referenced_checksums:
logger.warning('GC attempted to remove CAS checksums %s, which are still referenced',
referenced_checksums)
unreferenced_checksums = content_checksums - referenced_checksums
# Return all placements for all image storages found not at a CAS path or with a content