Remove an extraneous slash in image file path computations. Add a script that will audit and fix image ancestry.

This commit is contained in:
jakedt 2014-02-21 12:05:03 -05:00
parent f339160ab9
commit 177a21e996
2 changed files with 145 additions and 7 deletions

View file

@ -38,33 +38,33 @@ class Storage(object):
def image_json_path(self, namespace, repository, image_id, storage_uuid):
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
return '{0}/json'.format(base_path)
return '{0}json'.format(base_path)
def image_mark_path(self, namespace, repository, image_id, storage_uuid):
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
return '{0}/_inprogress'.format(base_path)
return '{0}_inprogress'.format(base_path)
def image_checksum_path(self, namespace, repository, image_id, storage_uuid):
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
return '{0}/_checksum'.format(base_path)
return '{0}_checksum'.format(base_path)
def image_layer_path(self, namespace, repository, image_id, storage_uuid):
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
return '{0}/layer'.format(base_path)
return '{0}layer'.format(base_path)
def image_ancestry_path(self, namespace, repository, image_id, storage_uuid):
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
return '{0}/ancestry'.format(base_path)
return '{0}ancestry'.format(base_path)
def image_file_trie_path(self, namespace, repository, image_id,
storage_uuid):
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
return '{0}/files.trie'.format(base_path)
return '{0}files.trie'.format(base_path)
def image_file_diffs_path(self, namespace, repository, image_id,
storage_uuid):
base_path = self.image_path(namespace, repository, image_id, storage_uuid)
return '{0}/diffs.json'.format(base_path)
return '{0}diffs.json'.format(base_path)
def get_direct_download_url(self, path, expires_in=60):
return None

138
tools/auditancestry.py Normal file
View file

@ -0,0 +1,138 @@
import logging
import json
from data.database import Image, ImageStorage, Repository
from data import model
from app import app
import boto.s3.connection
import boto.s3.key
store = app.config['STORAGE']
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
# Turn off debug logging for boto
logging.getLogger('boto').setLevel(logging.CRITICAL)
query = (Image
.select(Image, ImageStorage, Repository)
.join(ImageStorage)
.switch(Image)
.join(Repository))
bad_count = 0
good_count = 0
s3_conn = boto.s3.connection.S3Connection(app.config['AWS_ACCESS_KEY'],
app.config['AWS_SECRET_KEY'])
s3_bucket = s3_conn.get_bucket('quay-registry')
PATHS = [
store.image_json_path,
store.image_checksum_path,
store.image_layer_path,
store.image_ancestry_path,
store.image_file_trie_path,
store.image_file_diffs_path,
]
def resolve_or_create(repo, docker_image_id, new_ancestry):
existing = model.get_repo_image(repo.namespace, repo.name, docker_image_id)
if existing:
logger.debug('Found existing image: %s, %s', existing.id, docker_image_id)
return existing
else:
# we need to find some storage to link it to
try:
to_link = (ImageStorage
.select()
.join(Image)
.where(Image.docker_image_id == docker_image_id)
.get())
logger.debug('Linking to storage: %s' % to_link.uuid)
created = Image.create(docker_image_id=docker_image_id, repository=repo,
storage=to_link, ancestors=new_ancestry)
logger.debug('Created image: %s' % created)
return created
except ImageStorage.DoesNotExist:
logger.warning('No storage for ancestor, tring to find it anywhere!')
try:
found = Image.get(docker_image_id=docker_image_id)
logger.debug('Found some legacy storage')
new_storage = ImageStorage.create(checksum=found.checksum,
created=found.created,
comment=found.comment,
command=found.command,
image_size=found.image_size)
logger.debug('Migrating data to new storage: %s' % new_storage.uuid)
for path in PATHS:
old_path = path(found.repository.namespace, found.repository.name,
docker_image_id, None)
new_path = path(None, None, None, new_storage.uuid)
logger.debug('Copying %s -> %s', old_path, new_path)
old_path_key = s3_bucket.get_key(old_path)
old_path_key.copy('quay-registry', new_path, encrypt_key=True,
validate_dst_bucket=False)
logger.debug('Creating new image from copied legacy storage: %s',
new_storage.uuid)
created = Image.create(docker_image_id=docker_image_id,
repository=repo,
storage=new_storage, ancestors=new_ancestry)
logger.debug('Created image: %s' % created)
return created
except Image.DoesNotExist:
logger.error('No image available anywhere for storage.')
raise RuntimeError('No image available anywhere for storage.')
cant_fix = []
for img in query:
try:
uuid = img.storage.uuid
ancestry_storage = store.image_ancestry_path(img.repository.namespace,
img.repository.name,
img.docker_image_id,
uuid)
if store.exists(ancestry_storage):
full_ancestry = json.loads(store.get_content(ancestry_storage))[1:]
full_ancestry.reverse()
ancestor_dbids = [int(anc_id)
for anc_id in img.ancestors.split('/')[1:-1]]
if len(full_ancestry) != len(ancestor_dbids):
logger.error('Image has incomplete ancestry: %s, %s, %s, %s' %
(img.id, img.docker_image_id, full_ancestry,
ancestor_dbids))
fixed_ancestry = '/'
for ancestor in full_ancestry:
ancestor_img = resolve_or_create(img.repository, ancestor,
fixed_ancestry)
fixed_ancestry += str(ancestor_img.id) + '/'
img.ancestors = fixed_ancestry
img.save()
bad_count += 1
else:
good_count += 1
except RuntimeError:
cant_fix.append(img)
logger.debug('Bad: %s Good: %s Can\'t Fix: %s', bad_count, good_count,
len(cant_fix))
for cant in cant_fix:
logger.error('Unable to fix %s in repo %s/%s', cant.id,
cant.repository.namespace, cant.repository.name)