From 6e05920d6b2923b74dff5a247364474089422cab Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Fri, 12 Feb 2016 17:55:33 -0500 Subject: [PATCH] Delete bad manifests from the DB --- endpoints/v2/manifest.py | 5 ++-- tools/auditmanifests.py | 53 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 tools/auditmanifests.py diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 9328e02d5..6199b45c1 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -63,7 +63,7 @@ _SCHEMA_VER = 'schemaVersion' class SignedManifest(object): - def __init__(self, manifest_bytes): + def __init__(self, manifest_bytes, validate=True): self._bytes = manifest_bytes self._parsed = json.loads(manifest_bytes) @@ -79,7 +79,8 @@ class SignedManifest(object): else: raise ValueError('repo_name has too many or too few pieces') - self._validate() + if validate: + self._validate() def _validate(self): for signature in self._signatures: diff --git a/tools/auditmanifests.py b/tools/auditmanifests.py new file mode 100644 index 000000000..1ce26afc4 --- /dev/null +++ b/tools/auditmanifests.py @@ -0,0 +1,53 @@ +import logging + +from peewee import fn + +from app import app +from util.migrate.allocator import yield_random_entries +from endpoints.v2.manifest import SignedManifest +from data.database import TagManifest +from data import model + + +logger = logging.getLogger(__name__) + + +PRINT_EVERY = 10 +BATCH_SIZE = 100 + + +def batch_query(): + return TagManifest.select() + + +def remove_stale_manifests(): + max_manifest_id = TagManifest.select(fn.Max(TagManifest.id)).scalar() + problematic = 0 + checked = 0 + for found, _ in yield_random_entries(batch_query, TagManifest.id, BATCH_SIZE, max_manifest_id): + checked += 1 + parsed = SignedManifest(found.json_data, validate=False) + logger.debug('Auditing manifest with id: %s for %s/%s', found.digest, parsed.namespace, + parsed.repo_name) + + try: + for layer_mdata in parsed.layers: + digest = layer_mdata.digest + found = model.blob.get_repo_blob_by_digest(parsed.namespace, parsed.repo_name, digest) + + except model.BlobDoesNotExist: + logger.warning('Manifest missing layer: %s, deleting', digest) + found.delete_instance() + problematic += 1 + + if checked % PRINT_EVERY == 0: + logger.info('Removed %s/%s manifests', problematic, checked) + + logger.info('Final Summary: %s/%s manifests removed', problematic, checked) + + +if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + logging.getLogger('endpoints.v2.manifest').setLevel(logging.WARNING) + logging.getLogger('peewee').setLevel(logging.WARNING) + remove_stale_manifests()