This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/tools/auditancestry.py

141 lines
No EOL
4.8 KiB
Python

import logging
import json
from data.database import Image, ImageStorage, Repository
from data import model
from app import app
import boto.s3.connection
import boto.s3.key
store = app.config['STORAGE']
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
# Turn off debug logging for boto
logging.getLogger('boto').setLevel(logging.CRITICAL)
query = (Image
.select(Image, ImageStorage, Repository)
.join(ImageStorage)
.switch(Image)
.join(Repository))
bad_count = 0
good_count = 0
s3_conn = boto.s3.connection.S3Connection(app.config['AWS_ACCESS_KEY'],
app.config['AWS_SECRET_KEY'])
s3_bucket = s3_conn.get_bucket('quay-registry')
PATHS = [
store.image_json_path,
store.image_checksum_path,
store.image_layer_path,
store.image_ancestry_path,
store.image_file_trie_path,
store.image_file_diffs_path,
]
def resolve_or_create(repo, docker_image_id, new_ancestry):
existing = model.get_repo_image(repo.namespace, repo.name, docker_image_id)
if existing:
logger.debug('Found existing image: %s, %s', existing.id, docker_image_id)
return existing
else:
# we need to find some storage to link it to
try:
to_link = (ImageStorage
.select()
.join(Image)
.where(Image.docker_image_id == docker_image_id)
.get())
logger.debug('Linking to storage: %s' % to_link.uuid)
created = Image.create(docker_image_id=docker_image_id, repository=repo,
storage=to_link, ancestors=new_ancestry)
logger.debug('Created image: %s' % created)
return created
except ImageStorage.DoesNotExist:
logger.warning('No storage for ancestor, tring to find it anywhere: %s',
docker_image_id)
try:
found = Image.get(docker_image_id=docker_image_id)
logger.debug('Found some legacy storage for docker_image_id: %s',
docker_image_id)
new_storage = ImageStorage.create(checksum=found.checksum,
created=found.created,
comment=found.comment,
command=found.command,
image_size=found.image_size)
logger.debug('Migrating data to new storage: %s' % new_storage.uuid)
for path in PATHS:
old_path = path(found.repository.namespace, found.repository.name,
docker_image_id, None)
new_path = path(None, None, None, new_storage.uuid)
logger.debug('Copying %s -> %s', old_path, new_path)
old_path_key = s3_bucket.get_key(old_path)
old_path_key.copy('quay-registry', new_path, encrypt_key=True,
validate_dst_bucket=False)
logger.debug('Creating new image from copied legacy storage: %s',
new_storage.uuid)
created = Image.create(docker_image_id=docker_image_id,
repository=repo,
storage=new_storage, ancestors=new_ancestry)
logger.debug('Created image: %s' % created)
return created
except Image.DoesNotExist:
msg = 'No image available anywhere for storage: %s in namespace: %s'
logger.error(msg, docker_image_id, repo.namespace)
raise RuntimeError()
cant_fix = []
for img in query:
try:
uuid = img.storage.uuid
ancestry_storage = store.image_ancestry_path(img.repository.namespace,
img.repository.name,
img.docker_image_id,
uuid)
if store.exists(ancestry_storage):
full_ancestry = json.loads(store.get_content(ancestry_storage))[1:]
full_ancestry.reverse()
ancestor_dbids = [int(anc_id)
for anc_id in img.ancestors.split('/')[1:-1]]
if len(full_ancestry) != len(ancestor_dbids):
logger.error('Image has incomplete ancestry: %s, %s, %s, %s' %
(img.id, img.docker_image_id, full_ancestry,
ancestor_dbids))
fixed_ancestry = '/'
for ancestor in full_ancestry:
ancestor_img = resolve_or_create(img.repository, ancestor,
fixed_ancestry)
fixed_ancestry += str(ancestor_img.id) + '/'
img.ancestors = fixed_ancestry
img.save()
bad_count += 1
else:
good_count += 1
except RuntimeError:
cant_fix.append(img)
logger.debug('Bad: %s Good: %s Can\'t Fix: %s', bad_count, good_count,
len(cant_fix))
for cant in cant_fix:
logger.error('Unable to fix %s in repo %s/%s', cant.id,
cant.repository.namespace, cant.repository.name)