import logging import json from data.database import Image, ImageStorage, Repository from data import model from app import app, storage as store import boto.s3.connection import boto.s3.key logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG) # Turn off debug logging for boto logging.getLogger('boto').setLevel(logging.CRITICAL) query = (Image .select(Image, ImageStorage, Repository) .join(ImageStorage) .switch(Image) .join(Repository)) bad_count = 0 good_count = 0 s3_conn = boto.s3.connection.S3Connection(app.config['AWS_ACCESS_KEY'], app.config['AWS_SECRET_KEY']) s3_bucket = s3_conn.get_bucket('quay-registry') PATHS = [ store.image_json_path, store.image_checksum_path, store.image_layer_path, store.image_ancestry_path, store.image_file_trie_path, store.image_file_diffs_path, ] def resolve_or_create(repo, docker_image_id, new_ancestry): existing = model.get_repo_image(repo.namespace, repo.name, docker_image_id) if existing: logger.debug('Found existing image: %s, %s', existing.id, docker_image_id) return existing else: # we need to find some storage to link it to try: to_link = (ImageStorage .select() .join(Image) .where(Image.docker_image_id == docker_image_id) .get()) logger.debug('Linking to storage: %s' % to_link.uuid) created = Image.create(docker_image_id=docker_image_id, repository=repo, storage=to_link, ancestors=new_ancestry) logger.debug('Created image: %s' % created) return created except ImageStorage.DoesNotExist: logger.warning('No storage for ancestor, tring to find it anywhere: %s', docker_image_id) try: found = Image.get(docker_image_id=docker_image_id) logger.debug('Found some legacy storage for docker_image_id: %s', docker_image_id) new_storage = ImageStorage.create(checksum=found.checksum, created=found.created, comment=found.comment, command=found.command, image_size=found.image_size) logger.debug('Migrating data to new storage: %s' % new_storage.uuid) for path in PATHS: old_path = path(found.repository.namespace, found.repository.name, docker_image_id, None) new_path = path(None, None, None, new_storage.uuid) logger.debug('Copying %s -> %s', old_path, new_path) old_path_key = s3_bucket.get_key(old_path) old_path_key.copy('quay-registry', new_path, encrypt_key=True, validate_dst_bucket=False) logger.debug('Creating new image from copied legacy storage: %s', new_storage.uuid) created = Image.create(docker_image_id=docker_image_id, repository=repo, storage=new_storage, ancestors=new_ancestry) logger.debug('Created image: %s' % created) return created except Image.DoesNotExist: msg = 'No image available anywhere for storage: %s in namespace: %s' logger.error(msg, docker_image_id, repo.namespace) raise RuntimeError() def all_ancestors_exist(ancestors): if not ancestors: return True found_count = len(list(Image .select() .where(Image.id << ancestors))) return found_count == len(ancestors) cant_fix = [] for img in query: try: uuid = img.storage.uuid ancestry_storage = store.image_ancestry_path(img.repository.namespace, img.repository.name, img.docker_image_id, uuid) if store.exists(ancestry_storage): full_ancestry = json.loads(store.get_content(ancestry_storage))[1:] full_ancestry.reverse() ancestor_dbids = [int(anc_id) for anc_id in img.ancestors.split('/')[1:-1]] if len(full_ancestry) != len(ancestor_dbids) or not all_ancestors_exist(ancestor_dbids): logger.error('Image has incomplete ancestry: %s, %s, %s, %s' % (img.id, img.docker_image_id, full_ancestry, ancestor_dbids)) fixed_ancestry = '/' for ancestor in full_ancestry: ancestor_img = resolve_or_create(img.repository, ancestor, fixed_ancestry) fixed_ancestry += str(ancestor_img.id) + '/' img.ancestors = fixed_ancestry img.save() bad_count += 1 else: good_count += 1 except RuntimeError: cant_fix.append(img) logger.debug('Bad: %s Good: %s Can\'t Fix: %s', bad_count, good_count, len(cant_fix)) for cant in cant_fix: logger.error('Unable to fix %s in repo %s/%s', cant.id, cant.repository.namespace, cant.repository.name)