Add warning when CAS paths are skipped and ensure we are under a transaction
This commit is contained in:
parent
69e550d125
commit
62312e6461
2 changed files with 38 additions and 21 deletions
|
@ -7,6 +7,7 @@ import sys
|
||||||
import time
|
import time
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
|
from contextlib import contextmanager
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from random import SystemRandom
|
from random import SystemRandom
|
||||||
|
@ -230,6 +231,7 @@ db_match_func = CallableProxy()
|
||||||
db_for_update = CallableProxy()
|
db_for_update = CallableProxy()
|
||||||
db_transaction = CallableProxy()
|
db_transaction = CallableProxy()
|
||||||
db_concat_func = CallableProxy()
|
db_concat_func = CallableProxy()
|
||||||
|
ensure_under_transaction = CallableProxy()
|
||||||
|
|
||||||
|
|
||||||
def validate_database_url(url, db_kwargs, connect_timeout=5):
|
def validate_database_url(url, db_kwargs, connect_timeout=5):
|
||||||
|
@ -286,7 +288,16 @@ def configure(config_object):
|
||||||
def _db_transaction():
|
def _db_transaction():
|
||||||
return config_object['DB_TRANSACTION_FACTORY'](db)
|
return config_object['DB_TRANSACTION_FACTORY'](db)
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _ensure_under_transaction():
|
||||||
|
if not config_object['TESTING']:
|
||||||
|
if db.transaction_depth() == 0:
|
||||||
|
raise Exception('Expected to be under a transaction')
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
db_transaction.initialize(_db_transaction)
|
db_transaction.initialize(_db_transaction)
|
||||||
|
ensure_under_transaction.initialize(_ensure_under_transaction)
|
||||||
|
|
||||||
def random_string_generator(length=16):
|
def random_string_generator(length=16):
|
||||||
def random_string():
|
def random_string():
|
||||||
|
|
|
@ -8,7 +8,8 @@ from data.model import (config, db_transaction, InvalidImageException, TorrentIn
|
||||||
DataModelException, _basequery)
|
DataModelException, _basequery)
|
||||||
from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation,
|
from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation,
|
||||||
ImageStorageTransformation, ImageStorageSignature,
|
ImageStorageTransformation, ImageStorageSignature,
|
||||||
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo)
|
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo,
|
||||||
|
ensure_under_transaction)
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -83,29 +84,34 @@ def garbage_collect_storage(storage_id_whitelist):
|
||||||
""" Returns the list of paths to remove from storage, filtered from the given placements
|
""" Returns the list of paths to remove from storage, filtered from the given placements
|
||||||
query by removing any CAS paths that are still referenced by storage(s) in the database.
|
query by removing any CAS paths that are still referenced by storage(s) in the database.
|
||||||
"""
|
"""
|
||||||
if not placements_list:
|
with ensure_under_transaction():
|
||||||
return set()
|
if not placements_list:
|
||||||
|
return set()
|
||||||
|
|
||||||
# Find the content checksums not referenced by other storages. Any that are, we cannot
|
# Find the content checksums not referenced by other storages. Any that are, we cannot
|
||||||
# remove.
|
# remove.
|
||||||
content_checksums = set([placement.storage.content_checksum for placement in placements_list
|
content_checksums = set([placement.storage.content_checksum for placement in placements_list
|
||||||
if placement.storage.cas_path])
|
if placement.storage.cas_path])
|
||||||
|
|
||||||
unreferenced_checksums = set()
|
unreferenced_checksums = set()
|
||||||
if content_checksums:
|
if content_checksums:
|
||||||
query = (ImageStorage
|
query = (ImageStorage
|
||||||
.select(ImageStorage.content_checksum)
|
.select(ImageStorage.content_checksum)
|
||||||
.where(ImageStorage.content_checksum << list(content_checksums)))
|
.where(ImageStorage.content_checksum << list(content_checksums)))
|
||||||
referenced_checksums = set([image_storage.content_checksum for image_storage in query])
|
referenced_checksums = set([image_storage.content_checksum for image_storage in query])
|
||||||
unreferenced_checksums = content_checksums - referenced_checksums
|
if referenced_checksums:
|
||||||
|
logger.warning('GC attempted to remove CAS checksums %s, which are still referenced',
|
||||||
|
referenced_checksums)
|
||||||
|
|
||||||
# Return all placements for all image storages found not at a CAS path or with a content
|
unreferenced_checksums = content_checksums - referenced_checksums
|
||||||
# checksum that is referenced.
|
|
||||||
return {(get_image_location_for_id(placement.location_id).name,
|
# Return all placements for all image storages found not at a CAS path or with a content
|
||||||
get_layer_path(placement.storage))
|
# checksum that is referenced.
|
||||||
for placement in placements_list
|
return {(get_image_location_for_id(placement.location_id).name,
|
||||||
if not placement.storage.cas_path or
|
get_layer_path(placement.storage))
|
||||||
placement.storage.content_checksum in unreferenced_checksums}
|
for placement in placements_list
|
||||||
|
if not placement.storage.cas_path or
|
||||||
|
placement.storage.content_checksum in unreferenced_checksums}
|
||||||
|
|
||||||
# Note: Both of these deletes must occur in the same transaction (unfortunately) because a
|
# Note: Both of these deletes must occur in the same transaction (unfortunately) because a
|
||||||
# storage without any placement is invalid, and a placement cannot exist without a storage.
|
# storage without any placement is invalid, and a placement cannot exist without a storage.
|
||||||
|
|
Reference in a new issue