Remove an extraneous slash in image file path computations. Add a script that will audit and fix image ancestry.
This commit is contained in:
parent
f339160ab9
commit
177a21e996
2 changed files with 145 additions and 7 deletions
|
@ -38,33 +38,33 @@ class Storage(object):
|
||||||
|
|
||||||
def image_json_path(self, namespace, repository, image_id, storage_uuid):
|
def image_json_path(self, namespace, repository, image_id, storage_uuid):
|
||||||
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
||||||
return '{0}/json'.format(base_path)
|
return '{0}json'.format(base_path)
|
||||||
|
|
||||||
def image_mark_path(self, namespace, repository, image_id, storage_uuid):
|
def image_mark_path(self, namespace, repository, image_id, storage_uuid):
|
||||||
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
||||||
return '{0}/_inprogress'.format(base_path)
|
return '{0}_inprogress'.format(base_path)
|
||||||
|
|
||||||
def image_checksum_path(self, namespace, repository, image_id, storage_uuid):
|
def image_checksum_path(self, namespace, repository, image_id, storage_uuid):
|
||||||
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
||||||
return '{0}/_checksum'.format(base_path)
|
return '{0}_checksum'.format(base_path)
|
||||||
|
|
||||||
def image_layer_path(self, namespace, repository, image_id, storage_uuid):
|
def image_layer_path(self, namespace, repository, image_id, storage_uuid):
|
||||||
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
||||||
return '{0}/layer'.format(base_path)
|
return '{0}layer'.format(base_path)
|
||||||
|
|
||||||
def image_ancestry_path(self, namespace, repository, image_id, storage_uuid):
|
def image_ancestry_path(self, namespace, repository, image_id, storage_uuid):
|
||||||
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
||||||
return '{0}/ancestry'.format(base_path)
|
return '{0}ancestry'.format(base_path)
|
||||||
|
|
||||||
def image_file_trie_path(self, namespace, repository, image_id,
|
def image_file_trie_path(self, namespace, repository, image_id,
|
||||||
storage_uuid):
|
storage_uuid):
|
||||||
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
||||||
return '{0}/files.trie'.format(base_path)
|
return '{0}files.trie'.format(base_path)
|
||||||
|
|
||||||
def image_file_diffs_path(self, namespace, repository, image_id,
|
def image_file_diffs_path(self, namespace, repository, image_id,
|
||||||
storage_uuid):
|
storage_uuid):
|
||||||
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
|
||||||
return '{0}/diffs.json'.format(base_path)
|
return '{0}diffs.json'.format(base_path)
|
||||||
|
|
||||||
def get_direct_download_url(self, path, expires_in=60):
|
def get_direct_download_url(self, path, expires_in=60):
|
||||||
return None
|
return None
|
||||||
|
|
138
tools/auditancestry.py
Normal file
138
tools/auditancestry.py
Normal file
|
@ -0,0 +1,138 @@
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
|
||||||
|
from data.database import Image, ImageStorage, Repository
|
||||||
|
from data import model
|
||||||
|
from app import app
|
||||||
|
|
||||||
|
import boto.s3.connection
|
||||||
|
import boto.s3.key
|
||||||
|
|
||||||
|
|
||||||
|
store = app.config['STORAGE']
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
# Turn off debug logging for boto
|
||||||
|
logging.getLogger('boto').setLevel(logging.CRITICAL)
|
||||||
|
|
||||||
|
|
||||||
|
query = (Image
|
||||||
|
.select(Image, ImageStorage, Repository)
|
||||||
|
.join(ImageStorage)
|
||||||
|
.switch(Image)
|
||||||
|
.join(Repository))
|
||||||
|
|
||||||
|
|
||||||
|
bad_count = 0
|
||||||
|
good_count = 0
|
||||||
|
|
||||||
|
s3_conn = boto.s3.connection.S3Connection(app.config['AWS_ACCESS_KEY'],
|
||||||
|
app.config['AWS_SECRET_KEY'])
|
||||||
|
s3_bucket = s3_conn.get_bucket('quay-registry')
|
||||||
|
|
||||||
|
PATHS = [
|
||||||
|
store.image_json_path,
|
||||||
|
store.image_checksum_path,
|
||||||
|
store.image_layer_path,
|
||||||
|
store.image_ancestry_path,
|
||||||
|
store.image_file_trie_path,
|
||||||
|
store.image_file_diffs_path,
|
||||||
|
]
|
||||||
|
|
||||||
|
def resolve_or_create(repo, docker_image_id, new_ancestry):
|
||||||
|
existing = model.get_repo_image(repo.namespace, repo.name, docker_image_id)
|
||||||
|
if existing:
|
||||||
|
logger.debug('Found existing image: %s, %s', existing.id, docker_image_id)
|
||||||
|
return existing
|
||||||
|
else:
|
||||||
|
# we need to find some storage to link it to
|
||||||
|
try:
|
||||||
|
to_link = (ImageStorage
|
||||||
|
.select()
|
||||||
|
.join(Image)
|
||||||
|
.where(Image.docker_image_id == docker_image_id)
|
||||||
|
.get())
|
||||||
|
logger.debug('Linking to storage: %s' % to_link.uuid)
|
||||||
|
created = Image.create(docker_image_id=docker_image_id, repository=repo,
|
||||||
|
storage=to_link, ancestors=new_ancestry)
|
||||||
|
logger.debug('Created image: %s' % created)
|
||||||
|
return created
|
||||||
|
except ImageStorage.DoesNotExist:
|
||||||
|
logger.warning('No storage for ancestor, tring to find it anywhere!')
|
||||||
|
try:
|
||||||
|
found = Image.get(docker_image_id=docker_image_id)
|
||||||
|
logger.debug('Found some legacy storage')
|
||||||
|
new_storage = ImageStorage.create(checksum=found.checksum,
|
||||||
|
created=found.created,
|
||||||
|
comment=found.comment,
|
||||||
|
command=found.command,
|
||||||
|
image_size=found.image_size)
|
||||||
|
|
||||||
|
logger.debug('Migrating data to new storage: %s' % new_storage.uuid)
|
||||||
|
|
||||||
|
for path in PATHS:
|
||||||
|
old_path = path(found.repository.namespace, found.repository.name,
|
||||||
|
docker_image_id, None)
|
||||||
|
new_path = path(None, None, None, new_storage.uuid)
|
||||||
|
logger.debug('Copying %s -> %s', old_path, new_path)
|
||||||
|
|
||||||
|
old_path_key = s3_bucket.get_key(old_path)
|
||||||
|
old_path_key.copy('quay-registry', new_path, encrypt_key=True,
|
||||||
|
validate_dst_bucket=False)
|
||||||
|
|
||||||
|
logger.debug('Creating new image from copied legacy storage: %s',
|
||||||
|
new_storage.uuid)
|
||||||
|
created = Image.create(docker_image_id=docker_image_id,
|
||||||
|
repository=repo,
|
||||||
|
storage=new_storage, ancestors=new_ancestry)
|
||||||
|
logger.debug('Created image: %s' % created)
|
||||||
|
return created
|
||||||
|
|
||||||
|
except Image.DoesNotExist:
|
||||||
|
logger.error('No image available anywhere for storage.')
|
||||||
|
raise RuntimeError('No image available anywhere for storage.')
|
||||||
|
|
||||||
|
|
||||||
|
cant_fix = []
|
||||||
|
for img in query:
|
||||||
|
try:
|
||||||
|
uuid = img.storage.uuid
|
||||||
|
ancestry_storage = store.image_ancestry_path(img.repository.namespace,
|
||||||
|
img.repository.name,
|
||||||
|
img.docker_image_id,
|
||||||
|
uuid)
|
||||||
|
if store.exists(ancestry_storage):
|
||||||
|
full_ancestry = json.loads(store.get_content(ancestry_storage))[1:]
|
||||||
|
full_ancestry.reverse()
|
||||||
|
|
||||||
|
ancestor_dbids = [int(anc_id)
|
||||||
|
for anc_id in img.ancestors.split('/')[1:-1]]
|
||||||
|
|
||||||
|
if len(full_ancestry) != len(ancestor_dbids):
|
||||||
|
logger.error('Image has incomplete ancestry: %s, %s, %s, %s' %
|
||||||
|
(img.id, img.docker_image_id, full_ancestry,
|
||||||
|
ancestor_dbids))
|
||||||
|
|
||||||
|
fixed_ancestry = '/'
|
||||||
|
for ancestor in full_ancestry:
|
||||||
|
ancestor_img = resolve_or_create(img.repository, ancestor,
|
||||||
|
fixed_ancestry)
|
||||||
|
fixed_ancestry += str(ancestor_img.id) + '/'
|
||||||
|
|
||||||
|
img.ancestors = fixed_ancestry
|
||||||
|
img.save()
|
||||||
|
|
||||||
|
bad_count += 1
|
||||||
|
else:
|
||||||
|
good_count += 1
|
||||||
|
|
||||||
|
except RuntimeError:
|
||||||
|
cant_fix.append(img)
|
||||||
|
|
||||||
|
logger.debug('Bad: %s Good: %s Can\'t Fix: %s', bad_count, good_count,
|
||||||
|
len(cant_fix))
|
||||||
|
|
||||||
|
for cant in cant_fix:
|
||||||
|
logger.error('Unable to fix %s in repo %s/%s', cant.id,
|
||||||
|
cant.repository.namespace, cant.repository.name)
|
Reference in a new issue