From 62312e64619e80619a3bd2d8c84dd8a00b4ab975 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 8 Mar 2017 17:01:07 -0500 Subject: [PATCH] Add warning when CAS paths are skipped and ensure we are under a transaction --- data/database.py | 11 ++++++++++ data/model/storage.py | 48 ++++++++++++++++++++++++------------------- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/data/database.py b/data/database.py index f07ad6592..d51f052bf 100644 --- a/data/database.py +++ b/data/database.py @@ -7,6 +7,7 @@ import sys import time import uuid +from contextlib import contextmanager from collections import defaultdict from datetime import datetime from random import SystemRandom @@ -230,6 +231,7 @@ db_match_func = CallableProxy() db_for_update = CallableProxy() db_transaction = CallableProxy() db_concat_func = CallableProxy() +ensure_under_transaction = CallableProxy() def validate_database_url(url, db_kwargs, connect_timeout=5): @@ -286,7 +288,16 @@ def configure(config_object): def _db_transaction(): return config_object['DB_TRANSACTION_FACTORY'](db) + @contextmanager + def _ensure_under_transaction(): + if not config_object['TESTING']: + if db.transaction_depth() == 0: + raise Exception('Expected to be under a transaction') + + yield + db_transaction.initialize(_db_transaction) + ensure_under_transaction.initialize(_ensure_under_transaction) def random_string_generator(length=16): def random_string(): diff --git a/data/model/storage.py b/data/model/storage.py index b0d6e2487..79ab529bb 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -8,7 +8,8 @@ from data.model import (config, db_transaction, InvalidImageException, TorrentIn DataModelException, _basequery) from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation, ImageStorageTransformation, ImageStorageSignature, - ImageStorageSignatureKind, Repository, Namespace, TorrentInfo) + ImageStorageSignatureKind, Repository, Namespace, TorrentInfo, + ensure_under_transaction) logger = logging.getLogger(__name__) @@ -83,29 +84,34 @@ def garbage_collect_storage(storage_id_whitelist): """ Returns the list of paths to remove from storage, filtered from the given placements query by removing any CAS paths that are still referenced by storage(s) in the database. """ - if not placements_list: - return set() + with ensure_under_transaction(): + if not placements_list: + return set() - # Find the content checksums not referenced by other storages. Any that are, we cannot - # remove. - content_checksums = set([placement.storage.content_checksum for placement in placements_list - if placement.storage.cas_path]) + # Find the content checksums not referenced by other storages. Any that are, we cannot + # remove. + content_checksums = set([placement.storage.content_checksum for placement in placements_list + if placement.storage.cas_path]) - unreferenced_checksums = set() - if content_checksums: - query = (ImageStorage - .select(ImageStorage.content_checksum) - .where(ImageStorage.content_checksum << list(content_checksums))) - referenced_checksums = set([image_storage.content_checksum for image_storage in query]) - unreferenced_checksums = content_checksums - referenced_checksums + unreferenced_checksums = set() + if content_checksums: + query = (ImageStorage + .select(ImageStorage.content_checksum) + .where(ImageStorage.content_checksum << list(content_checksums))) + referenced_checksums = set([image_storage.content_checksum for image_storage in query]) + if referenced_checksums: + logger.warning('GC attempted to remove CAS checksums %s, which are still referenced', + referenced_checksums) - # Return all placements for all image storages found not at a CAS path or with a content - # checksum that is referenced. - return {(get_image_location_for_id(placement.location_id).name, - get_layer_path(placement.storage)) - for placement in placements_list - if not placement.storage.cas_path or - placement.storage.content_checksum in unreferenced_checksums} + unreferenced_checksums = content_checksums - referenced_checksums + + # Return all placements for all image storages found not at a CAS path or with a content + # checksum that is referenced. + return {(get_image_location_for_id(placement.location_id).name, + get_layer_path(placement.storage)) + for placement in placements_list + if not placement.storage.cas_path or + placement.storage.content_checksum in unreferenced_checksums} # Note: Both of these deletes must occur in the same transaction (unfortunately) because a # storage without any placement is invalid, and a placement cannot exist without a storage.