diff --git a/storage/basestorage.py b/storage/basestorage.py index a89e3cb72..eb7a78928 100644 --- a/storage/basestorage.py +++ b/storage/basestorage.py @@ -38,33 +38,33 @@ class Storage(object): def image_json_path(self, namespace, repository, image_id, storage_uuid): base_path = self.image_path(namespace, repository, image_id, storage_uuid) - return '{0}/json'.format(base_path) + return '{0}json'.format(base_path) def image_mark_path(self, namespace, repository, image_id, storage_uuid): base_path = self.image_path(namespace, repository, image_id, storage_uuid) - return '{0}/_inprogress'.format(base_path) + return '{0}_inprogress'.format(base_path) def image_checksum_path(self, namespace, repository, image_id, storage_uuid): base_path = self.image_path(namespace, repository, image_id, storage_uuid) - return '{0}/_checksum'.format(base_path) + return '{0}_checksum'.format(base_path) def image_layer_path(self, namespace, repository, image_id, storage_uuid): base_path = self.image_path(namespace, repository, image_id, storage_uuid) - return '{0}/layer'.format(base_path) + return '{0}layer'.format(base_path) def image_ancestry_path(self, namespace, repository, image_id, storage_uuid): base_path = self.image_path(namespace, repository, image_id, storage_uuid) - return '{0}/ancestry'.format(base_path) + return '{0}ancestry'.format(base_path) def image_file_trie_path(self, namespace, repository, image_id, storage_uuid): base_path = self.image_path(namespace, repository, image_id, storage_uuid) - return '{0}/files.trie'.format(base_path) + return '{0}files.trie'.format(base_path) def image_file_diffs_path(self, namespace, repository, image_id, storage_uuid): base_path = self.image_path(namespace, repository, image_id, storage_uuid) - return '{0}/diffs.json'.format(base_path) + return '{0}diffs.json'.format(base_path) def get_direct_download_url(self, path, expires_in=60): return None diff --git a/tools/auditancestry.py b/tools/auditancestry.py new file mode 100644 index 000000000..d4b79522a --- /dev/null +++ b/tools/auditancestry.py @@ -0,0 +1,138 @@ +import logging +import json + +from data.database import Image, ImageStorage, Repository +from data import model +from app import app + +import boto.s3.connection +import boto.s3.key + + +store = app.config['STORAGE'] +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.DEBUG) + +# Turn off debug logging for boto +logging.getLogger('boto').setLevel(logging.CRITICAL) + + +query = (Image + .select(Image, ImageStorage, Repository) + .join(ImageStorage) + .switch(Image) + .join(Repository)) + + +bad_count = 0 +good_count = 0 + +s3_conn = boto.s3.connection.S3Connection(app.config['AWS_ACCESS_KEY'], + app.config['AWS_SECRET_KEY']) +s3_bucket = s3_conn.get_bucket('quay-registry') + +PATHS = [ + store.image_json_path, + store.image_checksum_path, + store.image_layer_path, + store.image_ancestry_path, + store.image_file_trie_path, + store.image_file_diffs_path, +] + +def resolve_or_create(repo, docker_image_id, new_ancestry): + existing = model.get_repo_image(repo.namespace, repo.name, docker_image_id) + if existing: + logger.debug('Found existing image: %s, %s', existing.id, docker_image_id) + return existing + else: + # we need to find some storage to link it to + try: + to_link = (ImageStorage + .select() + .join(Image) + .where(Image.docker_image_id == docker_image_id) + .get()) + logger.debug('Linking to storage: %s' % to_link.uuid) + created = Image.create(docker_image_id=docker_image_id, repository=repo, + storage=to_link, ancestors=new_ancestry) + logger.debug('Created image: %s' % created) + return created + except ImageStorage.DoesNotExist: + logger.warning('No storage for ancestor, tring to find it anywhere!') + try: + found = Image.get(docker_image_id=docker_image_id) + logger.debug('Found some legacy storage') + new_storage = ImageStorage.create(checksum=found.checksum, + created=found.created, + comment=found.comment, + command=found.command, + image_size=found.image_size) + + logger.debug('Migrating data to new storage: %s' % new_storage.uuid) + + for path in PATHS: + old_path = path(found.repository.namespace, found.repository.name, + docker_image_id, None) + new_path = path(None, None, None, new_storage.uuid) + logger.debug('Copying %s -> %s', old_path, new_path) + + old_path_key = s3_bucket.get_key(old_path) + old_path_key.copy('quay-registry', new_path, encrypt_key=True, + validate_dst_bucket=False) + + logger.debug('Creating new image from copied legacy storage: %s', + new_storage.uuid) + created = Image.create(docker_image_id=docker_image_id, + repository=repo, + storage=new_storage, ancestors=new_ancestry) + logger.debug('Created image: %s' % created) + return created + + except Image.DoesNotExist: + logger.error('No image available anywhere for storage.') + raise RuntimeError('No image available anywhere for storage.') + + +cant_fix = [] +for img in query: + try: + uuid = img.storage.uuid + ancestry_storage = store.image_ancestry_path(img.repository.namespace, + img.repository.name, + img.docker_image_id, + uuid) + if store.exists(ancestry_storage): + full_ancestry = json.loads(store.get_content(ancestry_storage))[1:] + full_ancestry.reverse() + + ancestor_dbids = [int(anc_id) + for anc_id in img.ancestors.split('/')[1:-1]] + + if len(full_ancestry) != len(ancestor_dbids): + logger.error('Image has incomplete ancestry: %s, %s, %s, %s' % + (img.id, img.docker_image_id, full_ancestry, + ancestor_dbids)) + + fixed_ancestry = '/' + for ancestor in full_ancestry: + ancestor_img = resolve_or_create(img.repository, ancestor, + fixed_ancestry) + fixed_ancestry += str(ancestor_img.id) + '/' + + img.ancestors = fixed_ancestry + img.save() + + bad_count += 1 + else: + good_count += 1 + + except RuntimeError: + cant_fix.append(img) + + logger.debug('Bad: %s Good: %s Can\'t Fix: %s', bad_count, good_count, + len(cant_fix)) + +for cant in cant_fix: + logger.error('Unable to fix %s in repo %s/%s', cant.id, + cant.repository.namespace, cant.repository.name) \ No newline at end of file