Add warning when CAS paths are skipped and ensure we are under a transaction

This commit is contained in:
Joseph Schorr 2017-03-08 17:01:07 -05:00
parent 69e550d125
commit 62312e6461
2 changed files with 38 additions and 21 deletions

View file

@ -7,6 +7,7 @@ import sys
import time import time
import uuid import uuid
from contextlib import contextmanager
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
from random import SystemRandom from random import SystemRandom
@ -230,6 +231,7 @@ db_match_func = CallableProxy()
db_for_update = CallableProxy() db_for_update = CallableProxy()
db_transaction = CallableProxy() db_transaction = CallableProxy()
db_concat_func = CallableProxy() db_concat_func = CallableProxy()
ensure_under_transaction = CallableProxy()
def validate_database_url(url, db_kwargs, connect_timeout=5): def validate_database_url(url, db_kwargs, connect_timeout=5):
@ -286,7 +288,16 @@ def configure(config_object):
def _db_transaction(): def _db_transaction():
return config_object['DB_TRANSACTION_FACTORY'](db) return config_object['DB_TRANSACTION_FACTORY'](db)
@contextmanager
def _ensure_under_transaction():
if not config_object['TESTING']:
if db.transaction_depth() == 0:
raise Exception('Expected to be under a transaction')
yield
db_transaction.initialize(_db_transaction) db_transaction.initialize(_db_transaction)
ensure_under_transaction.initialize(_ensure_under_transaction)
def random_string_generator(length=16): def random_string_generator(length=16):
def random_string(): def random_string():

View file

@ -8,7 +8,8 @@ from data.model import (config, db_transaction, InvalidImageException, TorrentIn
DataModelException, _basequery) DataModelException, _basequery)
from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation, from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation,
ImageStorageTransformation, ImageStorageSignature, ImageStorageTransformation, ImageStorageSignature,
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo) ImageStorageSignatureKind, Repository, Namespace, TorrentInfo,
ensure_under_transaction)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -83,29 +84,34 @@ def garbage_collect_storage(storage_id_whitelist):
""" Returns the list of paths to remove from storage, filtered from the given placements """ Returns the list of paths to remove from storage, filtered from the given placements
query by removing any CAS paths that are still referenced by storage(s) in the database. query by removing any CAS paths that are still referenced by storage(s) in the database.
""" """
if not placements_list: with ensure_under_transaction():
return set() if not placements_list:
return set()
# Find the content checksums not referenced by other storages. Any that are, we cannot # Find the content checksums not referenced by other storages. Any that are, we cannot
# remove. # remove.
content_checksums = set([placement.storage.content_checksum for placement in placements_list content_checksums = set([placement.storage.content_checksum for placement in placements_list
if placement.storage.cas_path]) if placement.storage.cas_path])
unreferenced_checksums = set() unreferenced_checksums = set()
if content_checksums: if content_checksums:
query = (ImageStorage query = (ImageStorage
.select(ImageStorage.content_checksum) .select(ImageStorage.content_checksum)
.where(ImageStorage.content_checksum << list(content_checksums))) .where(ImageStorage.content_checksum << list(content_checksums)))
referenced_checksums = set([image_storage.content_checksum for image_storage in query]) referenced_checksums = set([image_storage.content_checksum for image_storage in query])
unreferenced_checksums = content_checksums - referenced_checksums if referenced_checksums:
logger.warning('GC attempted to remove CAS checksums %s, which are still referenced',
referenced_checksums)
# Return all placements for all image storages found not at a CAS path or with a content unreferenced_checksums = content_checksums - referenced_checksums
# checksum that is referenced.
return {(get_image_location_for_id(placement.location_id).name, # Return all placements for all image storages found not at a CAS path or with a content
get_layer_path(placement.storage)) # checksum that is referenced.
for placement in placements_list return {(get_image_location_for_id(placement.location_id).name,
if not placement.storage.cas_path or get_layer_path(placement.storage))
placement.storage.content_checksum in unreferenced_checksums} for placement in placements_list
if not placement.storage.cas_path or
placement.storage.content_checksum in unreferenced_checksums}
# Note: Both of these deletes must occur in the same transaction (unfortunately) because a # Note: Both of these deletes must occur in the same transaction (unfortunately) because a
# storage without any placement is invalid, and a placement cannot exist without a storage. # storage without any placement is invalid, and a placement cannot exist without a storage.