Delete bad manifests from the DB

This commit is contained in:
Jake Moshenko 2016-02-12 17:55:33 -05:00
parent babf61fdaa
commit 6e05920d6b
2 changed files with 56 additions and 2 deletions

View file

@ -63,7 +63,7 @@ _SCHEMA_VER = 'schemaVersion'
class SignedManifest(object): class SignedManifest(object):
def __init__(self, manifest_bytes): def __init__(self, manifest_bytes, validate=True):
self._bytes = manifest_bytes self._bytes = manifest_bytes
self._parsed = json.loads(manifest_bytes) self._parsed = json.loads(manifest_bytes)
@ -79,6 +79,7 @@ class SignedManifest(object):
else: else:
raise ValueError('repo_name has too many or too few pieces') raise ValueError('repo_name has too many or too few pieces')
if validate:
self._validate() self._validate()
def _validate(self): def _validate(self):

53
tools/auditmanifests.py Normal file
View file

@ -0,0 +1,53 @@
import logging
from peewee import fn
from app import app
from util.migrate.allocator import yield_random_entries
from endpoints.v2.manifest import SignedManifest
from data.database import TagManifest
from data import model
logger = logging.getLogger(__name__)
PRINT_EVERY = 10
BATCH_SIZE = 100
def batch_query():
return TagManifest.select()
def remove_stale_manifests():
max_manifest_id = TagManifest.select(fn.Max(TagManifest.id)).scalar()
problematic = 0
checked = 0
for found, _ in yield_random_entries(batch_query, TagManifest.id, BATCH_SIZE, max_manifest_id):
checked += 1
parsed = SignedManifest(found.json_data, validate=False)
logger.debug('Auditing manifest with id: %s for %s/%s', found.digest, parsed.namespace,
parsed.repo_name)
try:
for layer_mdata in parsed.layers:
digest = layer_mdata.digest
found = model.blob.get_repo_blob_by_digest(parsed.namespace, parsed.repo_name, digest)
except model.BlobDoesNotExist:
logger.warning('Manifest missing layer: %s, deleting', digest)
found.delete_instance()
problematic += 1
if checked % PRINT_EVERY == 0:
logger.info('Removed %s/%s manifests', problematic, checked)
logger.info('Final Summary: %s/%s manifests removed', problematic, checked)
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
logging.getLogger('endpoints.v2.manifest').setLevel(logging.WARNING)
logging.getLogger('peewee').setLevel(logging.WARNING)
remove_stale_manifests()