diff --git a/data/model/legacy.py b/data/model/legacy.py index e0e50cb67..403a7c4a5 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -1381,6 +1381,8 @@ def list_repository_tags(namespace_name, repository_name): def garbage_collect_repository(namespace_name, repository_name): + storage_id_whitelist = {} + with config.app_config['DB_TRANSACTION_FACTORY'](db): # Get a list of all images used by tags in the repository tag_query = (RepositoryTag @@ -1407,17 +1409,16 @@ def garbage_collect_repository(namespace_name, repository_name): if len(to_remove) > 0: logger.info('Cleaning up unreferenced images: %s', to_remove) storage_id_whitelist = {all_images[to_remove_id].storage.id for to_remove_id in to_remove} - Image.delete().where(Image.id << list(to_remove)).execute() - garbage_collect_storage(storage_id_whitelist) + if len(to_remove) > 0: + logger.info('Garbage collecting storage for images: %s', to_remove) + garbage_collect_storage(storage_id_whitelist) return len(to_remove) def garbage_collect_storage(storage_id_whitelist): - # We are going to make the conscious decision to not delete image storage inside the transaction - # This may end up producing garbage in s3, trading off for higher availability in the database def placements_query_to_paths_set(placements_query): return {(placement.location.name, config.store.image_path(placement.storage.uuid)) for placement in placements_query} @@ -1433,7 +1434,11 @@ def garbage_collect_storage(storage_id_whitelist): .group_by(ImageStorage) .having((fn.Count(Image.id) == 0) & (fn.Count(DerivedImageStorage.id) == 0))) - logger.debug('Garbage collecting storage from candidates: %s', storage_id_whitelist) + # Note: We remove the derived image storage in its own transaction as a way to reduce the + # time that the transaction holds on the database indicies. This could result in a derived + # image storage being deleted for an image storage which is later reused during this time, + # but since these are caches anyway, it isn't terrible and worth the tradeoff (for now). + logger.debug('Garbage collecting derived storage from candidates: %s', storage_id_whitelist) with config.app_config['DB_TRANSACTION_FACTORY'](db): # Find out which derived storages will be removed, and add them to the whitelist orphaned_from_candidates = list(orphaned_storage_query(ImageStorage.select(ImageStorage.id), @@ -1453,6 +1458,12 @@ def garbage_collect_storage(storage_id_whitelist): .where(DerivedImageStorage.source << orphaned_from_candidates) .execute()) + # Note: Both of these deletes must occur in the same transaction (unfortunately) because a + # storage without any placement is invalid, and a placement cannot exist without a storage. + # TODO: We might want to allow for null storages on placements, which would allow us to delete + # the storages, then delete the placements in a non-transaction. + logger.debug('Garbage collecting storages from candidates: %s', storage_id_whitelist) + with config.app_config['DB_TRANSACTION_FACTORY'](db): # Track all of the data that should be removed from blob storage placements_to_remove = orphaned_storage_query(ImageStoragePlacement .select(ImageStoragePlacement, @@ -1481,7 +1492,9 @@ def garbage_collect_storage(storage_id_whitelist): .where(ImageStorage.id << orphaned_storages) .execute()) - # Delete the actual blob storage + # We are going to make the conscious decision to not delete image storage blobs inside + # transactions. + # This may end up producing garbage in s3, trading off for higher availability in the database. for location_name, image_path in paths_to_remove: logger.debug('Removing %s from %s', image_path, location_name) config.store.remove({location_name}, image_path) diff --git a/test/data/registry/us/sharedimages/1d42f7d2-614d-a212-286f-72376617d2d9/diffs.json b/test/data/registry/us/sharedimages/1d42f7d2-614d-a212-286f-72376617d2d9/diffs.json deleted file mode 100644 index c42e20f73..000000000 --- a/test/data/registry/us/sharedimages/1d42f7d2-614d-a212-286f-72376617d2d9/diffs.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "removed": [], - "added": [], - "changed": [] -} \ No newline at end of file diff --git a/test/data/registry/us/sharedimages/6a787f8c-3cc9-b656-b7b0-988c54878741/diffs.json b/test/data/registry/us/sharedimages/6a787f8c-3cc9-b656-b7b0-988c54878741/diffs.json deleted file mode 100644 index b1df890a5..000000000 --- a/test/data/registry/us/sharedimages/6a787f8c-3cc9-b656-b7b0-988c54878741/diffs.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "removed": [], - "added": [ - "/elasticsearch-0.90.5.tar.gz" - ], - "changed": [] -} \ No newline at end of file diff --git a/test/data/registry/us/sharedimages/7d9e2b70-712b-6c47-c2f9-7a5af0987bc4/diffs.json b/test/data/registry/us/sharedimages/7d9e2b70-712b-6c47-c2f9-7a5af0987bc4/diffs.json deleted file mode 100644 index d77baf59a..000000000 --- a/test/data/registry/us/sharedimages/7d9e2b70-712b-6c47-c2f9-7a5af0987bc4/diffs.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "removed": [], - "added": [ - "/root/.bash_history", - "/usr/sbin/policy-rc.d" - ], - "changed": [] -} \ No newline at end of file diff --git a/test/data/registry/us/sharedimages/934caf90-6a23-4a62-9d0a-5004c3dcb2e7/diffs.json b/test/data/registry/us/sharedimages/934caf90-6a23-4a62-9d0a-5004c3dcb2e7/diffs.json deleted file mode 100644 index 23b050546..000000000 --- a/test/data/registry/us/sharedimages/934caf90-6a23-4a62-9d0a-5004c3dcb2e7/diffs.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "removed": [], - "added": [ - "/opt/elasticsearch-0.90.5/LICENSE.txt", - "/opt/elasticsearch-0.90.5/NOTICE.txt", - "/opt/elasticsearch-0.90.5/README.textile", - "/opt/elasticsearch-0.90.5/bin/elasticsearch", - "/opt/elasticsearch-0.90.5/bin/elasticsearch.in.sh", - "/opt/elasticsearch-0.90.5/bin/plugin", - "/opt/elasticsearch-0.90.5/config/elasticsearch.yml", - "/opt/elasticsearch-0.90.5/config/logging.yml", - "/opt/elasticsearch-0.90.5/lib/elasticsearch-0.90.5.jar", - "/opt/elasticsearch-0.90.5/lib/jna-3.3.0.jar", - "/opt/elasticsearch-0.90.5/lib/jts-1.12.jar", - "/opt/elasticsearch-0.90.5/lib/log4j-1.2.17.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-analyzers-common-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-codecs-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-core-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-grouping-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-highlighter-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-join-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-memory-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-misc-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-queries-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-queryparser-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-sandbox-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-spatial-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/lucene-suggest-4.4.0.jar", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-amd64-freebsd-6.so", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-amd64-linux.so", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-amd64-solaris.so", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-ia64-linux.so", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-sparc-solaris.so", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-sparc64-solaris.so", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-universal-macosx.dylib", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-universal64-macosx.dylib", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-x86-freebsd-5.so", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-x86-freebsd-6.so", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-x86-linux.so", - "/opt/elasticsearch-0.90.5/lib/sigar/libsigar-x86-solaris.so", - "/opt/elasticsearch-0.90.5/lib/sigar/sigar-1.6.4.jar", - "/opt/elasticsearch-0.90.5/lib/spatial4j-0.3.jar" - ], - "changed": [] -} \ No newline at end of file