import logging from peewee import fn from app import app from util.migrate.allocator import yield_random_entries from endpoints.v2.manifest import SignedManifest from data.database import TagManifest from data import model logger = logging.getLogger(__name__) PRINT_EVERY = 10 BATCH_SIZE = 100 def batch_query(): return TagManifest.select() def remove_stale_manifests(): max_manifest_id = TagManifest.select(fn.Max(TagManifest.id)).scalar() problematic = 0 checked = 0 manifest_gen = yield_random_entries(batch_query, TagManifest.id, BATCH_SIZE, max_manifest_id) for found, _, _ in manifest_gen: checked += 1 parsed = SignedManifest(found.json_data, validate=False) logger.debug('Auditing manifest with id: %s for %s/%s', found.digest, parsed.namespace, parsed.repo_name) try: for layer_mdata in parsed.layers: digest = layer_mdata.digest model.blob.get_repo_blob_by_digest(parsed.namespace, parsed.repo_name, digest) except model.BlobDoesNotExist: logger.warning('Manifest missing layer: %s, deleting', digest) found.delete_instance() problematic += 1 if checked % PRINT_EVERY == 0: logger.info('Removed %s/%s manifests', problematic, checked) logger.info('Final Summary: %s/%s manifests removed', problematic, checked) if __name__ == '__main__': logging.basicConfig(level=logging.INFO) logging.getLogger('endpoints.v2.manifest').setLevel(logging.WARNING) logging.getLogger('peewee').setLevel(logging.WARNING) remove_stale_manifests()