Change delete to once again not perform everything under a transaction. A recent revision accidentally moved everything back under it.

This commit is contained in:
Joseph Schorr 2014-10-29 11:59:02 -04:00
parent 109850b428
commit e9c2e54dbc
5 changed files with 19 additions and 71 deletions

View file

@ -1381,6 +1381,8 @@ def list_repository_tags(namespace_name, repository_name):
def garbage_collect_repository(namespace_name, repository_name):
storage_id_whitelist = {}
with config.app_config['DB_TRANSACTION_FACTORY'](db):
# Get a list of all images used by tags in the repository
tag_query = (RepositoryTag
@ -1407,17 +1409,16 @@ def garbage_collect_repository(namespace_name, repository_name):
if len(to_remove) > 0:
logger.info('Cleaning up unreferenced images: %s', to_remove)
storage_id_whitelist = {all_images[to_remove_id].storage.id for to_remove_id in to_remove}
Image.delete().where(Image.id << list(to_remove)).execute()
garbage_collect_storage(storage_id_whitelist)
if len(to_remove) > 0:
logger.info('Garbage collecting storage for images: %s', to_remove)
garbage_collect_storage(storage_id_whitelist)
return len(to_remove)
def garbage_collect_storage(storage_id_whitelist):
# We are going to make the conscious decision to not delete image storage inside the transaction
# This may end up producing garbage in s3, trading off for higher availability in the database
def placements_query_to_paths_set(placements_query):
return {(placement.location.name, config.store.image_path(placement.storage.uuid))
for placement in placements_query}
@ -1433,7 +1434,11 @@ def garbage_collect_storage(storage_id_whitelist):
.group_by(ImageStorage)
.having((fn.Count(Image.id) == 0) & (fn.Count(DerivedImageStorage.id) == 0)))
logger.debug('Garbage collecting storage from candidates: %s', storage_id_whitelist)
# Note: We remove the derived image storage in its own transaction as a way to reduce the
# time that the transaction holds on the database indicies. This could result in a derived
# image storage being deleted for an image storage which is later reused during this time,
# but since these are caches anyway, it isn't terrible and worth the tradeoff (for now).
logger.debug('Garbage collecting derived storage from candidates: %s', storage_id_whitelist)
with config.app_config['DB_TRANSACTION_FACTORY'](db):
# Find out which derived storages will be removed, and add them to the whitelist
orphaned_from_candidates = list(orphaned_storage_query(ImageStorage.select(ImageStorage.id),
@ -1453,6 +1458,12 @@ def garbage_collect_storage(storage_id_whitelist):
.where(DerivedImageStorage.source << orphaned_from_candidates)
.execute())
# Note: Both of these deletes must occur in the same transaction (unfortunately) because a
# storage without any placement is invalid, and a placement cannot exist without a storage.
# TODO: We might want to allow for null storages on placements, which would allow us to delete
# the storages, then delete the placements in a non-transaction.
logger.debug('Garbage collecting storages from candidates: %s', storage_id_whitelist)
with config.app_config['DB_TRANSACTION_FACTORY'](db):
# Track all of the data that should be removed from blob storage
placements_to_remove = orphaned_storage_query(ImageStoragePlacement
.select(ImageStoragePlacement,
@ -1481,7 +1492,9 @@ def garbage_collect_storage(storage_id_whitelist):
.where(ImageStorage.id << orphaned_storages)
.execute())
# Delete the actual blob storage
# We are going to make the conscious decision to not delete image storage blobs inside
# transactions.
# This may end up producing garbage in s3, trading off for higher availability in the database.
for location_name, image_path in paths_to_remove:
logger.debug('Removing %s from %s', image_path, location_name)
config.store.remove({location_name}, image_path)