This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/tools/auditancestry.py

150 lines
5 KiB
Python
Raw Normal View History

import logging
import json
from data.database import Image, ImageStorage, Repository
from data import model
from app import app, storage as store
import boto.s3.connection
import boto.s3.key
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
# Turn off debug logging for boto
logging.getLogger('boto').setLevel(logging.CRITICAL)
query = (Image
.select(Image, ImageStorage, Repository)
.join(ImageStorage)
.switch(Image)
.join(Repository))
bad_count = 0
good_count = 0
s3_conn = boto.s3.connection.S3Connection(app.config['AWS_ACCESS_KEY'],
app.config['AWS_SECRET_KEY'])
s3_bucket = s3_conn.get_bucket('quay-registry')
PATHS = [
store.image_json_path,
store.image_checksum_path,
store.image_layer_path,
store.image_ancestry_path,
store.image_file_trie_path,
store.image_file_diffs_path,
]
def resolve_or_create(repo, docker_image_id, new_ancestry):
existing = model.get_repo_image(repo.namespace, repo.name, docker_image_id)
if existing:
logger.debug('Found existing image: %s, %s', existing.id, docker_image_id)
return existing
else:
# we need to find some storage to link it to
try:
to_link = (ImageStorage
.select()
.join(Image)
.where(Image.docker_image_id == docker_image_id)
.get())
logger.debug('Linking to storage: %s' % to_link.uuid)
created = Image.create(docker_image_id=docker_image_id, repository=repo,
storage=to_link, ancestors=new_ancestry)
logger.debug('Created image: %s' % created)
return created
except ImageStorage.DoesNotExist:
logger.warning('No storage for ancestor, tring to find it anywhere: %s',
docker_image_id)
try:
found = Image.get(docker_image_id=docker_image_id)
logger.debug('Found some legacy storage for docker_image_id: %s',
docker_image_id)
new_storage = ImageStorage.create(checksum=found.checksum,
created=found.created,
comment=found.comment,
command=found.command,
image_size=found.image_size)
logger.debug('Migrating data to new storage: %s' % new_storage.uuid)
for path in PATHS:
old_path = path(found.repository.namespace, found.repository.name,
docker_image_id, None)
new_path = path(None, None, None, new_storage.uuid)
logger.debug('Copying %s -> %s', old_path, new_path)
old_path_key = s3_bucket.get_key(old_path)
old_path_key.copy('quay-registry', new_path, encrypt_key=True,
validate_dst_bucket=False)
logger.debug('Creating new image from copied legacy storage: %s',
new_storage.uuid)
created = Image.create(docker_image_id=docker_image_id,
repository=repo,
storage=new_storage, ancestors=new_ancestry)
logger.debug('Created image: %s' % created)
return created
except Image.DoesNotExist:
msg = 'No image available anywhere for storage: %s in namespace: %s'
logger.error(msg, docker_image_id, repo.namespace)
raise RuntimeError()
def all_ancestors_exist(ancestors):
if not ancestors:
return True
found_count = len(list(Image
.select()
.where(Image.id << ancestors)))
return found_count == len(ancestors)
cant_fix = []
for img in query:
try:
uuid = img.storage.uuid
ancestry_storage = store.image_ancestry_path(img.repository.namespace,
img.repository.name,
img.docker_image_id,
uuid)
if store.exists(ancestry_storage):
full_ancestry = json.loads(store.get_content(ancestry_storage))[1:]
full_ancestry.reverse()
ancestor_dbids = [int(anc_id)
for anc_id in img.ancestors.split('/')[1:-1]]
if len(full_ancestry) != len(ancestor_dbids) or not all_ancestors_exist(ancestor_dbids):
logger.error('Image has incomplete ancestry: %s, %s, %s, %s' %
(img.id, img.docker_image_id, full_ancestry,
ancestor_dbids))
fixed_ancestry = '/'
for ancestor in full_ancestry:
ancestor_img = resolve_or_create(img.repository, ancestor,
fixed_ancestry)
fixed_ancestry += str(ancestor_img.id) + '/'
img.ancestors = fixed_ancestry
img.save()
bad_count += 1
else:
good_count += 1
except RuntimeError:
cant_fix.append(img)
logger.debug('Bad: %s Good: %s Can\'t Fix: %s', bad_count, good_count,
len(cant_fix))
for cant in cant_fix:
logger.error('Unable to fix %s in repo %s/%s', cant.id,
cant.repository.namespace, cant.repository.name)