Split out the GC code into smaller transactions. Remove from the actual storage component after the fact, outside of the transactions.
This commit is contained in:
parent
ddab067ebf
commit
380eb49e58
1 changed files with 42 additions and 34 deletions
|
@ -1391,46 +1391,54 @@ def garbage_collect_repository(namespace_name, repository_name):
|
|||
|
||||
logger.info('Cleaning up unreferenced images: %s', to_remove)
|
||||
|
||||
uuids_to_check_for_gc = set()
|
||||
for image_id_to_remove in to_remove:
|
||||
image_to_remove = all_images[image_id_to_remove]
|
||||
uuids_to_check_for_gc = {all_images[id_to_remove].storage.uuid for id_to_remove in to_remove}
|
||||
Image.delete().where(Image.id << list(to_remove)).execute()
|
||||
|
||||
logger.debug('Adding image storage to the gc list: %s',
|
||||
image_to_remove.storage.uuid)
|
||||
uuids_to_check_for_gc.add(image_to_remove.storage.uuid)
|
||||
logger.debug('Checking image storages for being orphaned: %s', uuids_to_check_for_gc)
|
||||
|
||||
image_to_remove.delete_instance()
|
||||
# We are going to make the concious decision to not delete image storage inside the transaction
|
||||
# This may end up producing garbage in s3, trading off for higher availability in the database
|
||||
def placements_query_to_paths_set(placements_query):
|
||||
return {(placement.location.name, config.store.image_path(placement.storage.uuid))
|
||||
for placement in placements_query}
|
||||
|
||||
def remove_storages(query):
|
||||
for storage in query:
|
||||
logger.debug('Garbage collecting image storage: %s', storage.uuid)
|
||||
def remove_storages(placements_query):
|
||||
storages_to_remove = {placement.storage.uuid: placement.storage
|
||||
for placement in placements_query}
|
||||
for storage in storages_to_remove.values():
|
||||
# Deletes all placements as well, but leaves derived storages
|
||||
storage.delete_instance(recursive=True)
|
||||
|
||||
image_path = config.store.image_path(storage.uuid)
|
||||
for placement in storage.imagestorageplacement_set:
|
||||
location_name = placement.location.name
|
||||
placement.delete_instance()
|
||||
config.store.remove({location_name}, image_path)
|
||||
paths_to_remove = set()
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
placements_to_remove = (ImageStoragePlacement
|
||||
.select(ImageStoragePlacement, ImageStorage, ImageStorageLocation)
|
||||
.join(ImageStorage)
|
||||
.join(Image, JOIN_LEFT_OUTER)
|
||||
.switch(ImageStoragePlacement)
|
||||
.join(ImageStorageLocation)
|
||||
.group_by(ImageStorage)
|
||||
.where(ImageStorage.uuid << list(uuids_to_check_for_gc))
|
||||
.having(fn.Count(Image.id) == 0))
|
||||
paths_to_remove.update(placements_query_to_paths_set(placements_to_remove))
|
||||
remove_storages(placements_to_remove)
|
||||
|
||||
storage.delete_instance(recursive=True)
|
||||
with config.app_config['DB_TRANSACTION_FACTORY'](db):
|
||||
derived_to_remove = (ImageStoragePlacement
|
||||
.select(ImageStoragePlacement, ImageStorage, ImageStorageLocation)
|
||||
.join(ImageStorage)
|
||||
.join(DerivedImageStorage, on=(ImageStorage.id ==
|
||||
DerivedImageStorage.derivative))
|
||||
.switch(ImageStoragePlacement)
|
||||
.join(ImageStorageLocation)
|
||||
.where(DerivedImageStorage.source >> None))
|
||||
paths_to_remove.update(placements_query_to_paths_set(derived_to_remove))
|
||||
remove_storages(derived_to_remove)
|
||||
|
||||
if uuids_to_check_for_gc:
|
||||
storage_to_remove = (ImageStorage
|
||||
.select()
|
||||
.join(Image, JOIN_LEFT_OUTER)
|
||||
.group_by(ImageStorage)
|
||||
.where(ImageStorage.uuid << list(uuids_to_check_for_gc))
|
||||
.having(fn.Count(Image.id) == 0))
|
||||
|
||||
remove_storages(storage_to_remove)
|
||||
|
||||
# Now remove any derived image storages whose sources have been removed
|
||||
derived_storages_to_remove = (ImageStorage
|
||||
.select()
|
||||
.join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative))
|
||||
.where(DerivedImageStorage.source >> None))
|
||||
remove_storages(derived_storages_to_remove)
|
||||
|
||||
return len(to_remove)
|
||||
# Delete the actual blob storage
|
||||
for location_name, image_path in paths_to_remove:
|
||||
logger.debug('Removing %s from %s', image_path, location_name)
|
||||
config.store.remove({location_name}, image_path)
|
||||
|
||||
|
||||
def get_tag_image(namespace_name, repository_name, tag_name):
|
||||
|
|
Reference in a new issue