Load images and storage references in bulk during V1 synthesize

Currently, we perform multiple queries for each layer, making it much slower (especially cross-region)

Fixes #413
This commit is contained in:
Joseph Schorr 2015-09-29 17:53:39 -04:00
parent 78e8aefd45
commit 35c35d9913
4 changed files with 173 additions and 157 deletions

View file

@ -105,13 +105,10 @@ def _translate_placements_to_images_with_locations(query):
return images.values()
def lookup_repository_images(namespace_name, repository_name, docker_image_ids):
def lookup_repository_images(repo, docker_image_ids):
return (Image
.select()
.join(Repository)
.join(Namespace, on=(Repository.namespace_user == Namespace.id))
.where(Repository.name == repository_name, Namespace.username == namespace_name,
Image.docker_image_id << docker_image_ids))
.where(Image.repository == repo, Image.docker_image_id << docker_image_ids))
def get_matching_repository_images(namespace_name, repository_name, docker_image_ids):
@ -415,31 +412,14 @@ def get_image_layers(image):
return image_list
def synthesize_v1_image(namespace, repository_name, storage_checksum, docker_image_id,
created_date_str, comment, command, v1_json_metadata, parent_docker_id):
def synthesize_v1_image(repo, image_storage, docker_image_id, created_date_str,
comment, command, v1_json_metadata, parent_image=None):
""" Find an existing image with this docker image id, and if none exists, write one with the
specified metadata.
"""
repo = _basequery.get_existing_repository(namespace, repository_name)
# Sometimes the manifest may reference an image that already exists
found = get_image(repo, docker_image_id)
if found is not None:
# The image already exists, nothing to do
return found
the_bits = storage.get_repo_storage_by_checksum(namespace, repository_name, storage_checksum)
ancestors = '/'
if parent_docker_id is not None:
parent = get_repo_image(namespace, repository_name, parent_docker_id)
if parent is None:
msg = 'Parent not found with docker image id {0} in repo {1}/{2}'.format(parent_docker_id,
namespace,
repository_name)
raise InvalidImageException(msg)
ancestors = '{0}{1}/'.format(parent.ancestors, parent.id)
if parent_image is not None:
ancestors = '{0}{1}/'.format(parent_image.ancestors, parent_image.id)
created = None
if created_date_str is not None:
@ -451,4 +431,4 @@ def synthesize_v1_image(namespace, repository_name, storage_checksum, docker_ima
return Image.create(docker_image_id=docker_image_id, ancestors=ancestors, comment=comment,
command=command, v1_json_metadata=v1_json_metadata, created=created,
storage=the_bits, repository=repo)
storage=image_storage, repository=repo)

View file

@ -232,5 +232,10 @@ def get_layer_path(storage_record):
return store.blob_path(storage_record.checksum)
def lookup_repo_storages_by_checksum(repo, checksums):
""" Looks up repository storages (without placements) matching the given repository
and checksum. """
return (ImageStorage
.select()
.join(Image)
.where(Image.repository == repo, ImageStorage.checksum << checksums))

View file

@ -98,7 +98,7 @@ class SignedManifest(object):
@property
def layers(self):
""" Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
starting from the root image and working toward the leaf node.
starting from the leaf image and working toward the base node.
"""
for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY],
self._parsed[_HISTORY_KEY])):
@ -258,36 +258,73 @@ def write_manifest_by_digest(namespace, repo_name, manifest_ref):
def _write_manifest(namespace, repo_name, manifest):
# Ensure that the manifest is for this repository.
if manifest.namespace != namespace or manifest.repo_name != repo_name:
raise NameInvalid()
# Ensure that the repository exists.
repo = model.repository.get_repository(namespace, repo_name)
if repo is None:
raise NameInvalid()
# Lookup all the images and their parent images (if any) inside the manifest. This will let us
# know which V1 images we need to synthesize and which ones are invalid.
layers = list(manifest.layers)
docker_image_ids = [mdata.v1_metadata.docker_id for mdata in layers]
parent_image_ids = [mdata.v1_metadata.parent for mdata in layers
if mdata.v1_metadata.parent]
all_image_ids = list(set(docker_image_ids + parent_image_ids))
images_query = model.image.lookup_repository_images(repo, all_image_ids)
images_map = {image.docker_image_id: image for image in images_query}
# Lookup the storages associated with each blob in the manifest.
checksums = [str(mdata.digest) for mdata in manifest.layers]
storage_query = model.storage.lookup_repo_storages_by_checksum(repo, checksums)
storage_map = {storage.checksum: storage for storage in storage_query}
# Synthesize the V1 metadata for each layer.
manifest_digest = manifest.digest
tag_name = manifest.tag
leaf_layer = None
try:
for mdata in manifest.layers:
# Store the v1 metadata in the db
v1_mdata = mdata.v1_metadata
for mdata in layers:
digest_str = str(mdata.digest)
model.image.synthesize_v1_image(namespace, repo_name, digest_str, v1_mdata.docker_id,
v1_mdata.created, v1_mdata.comment, v1_mdata.command,
mdata.v1_metadata_str, v1_mdata.parent)
leaf_layer = mdata
v1_mdata = mdata.v1_metadata
except model.InvalidImageException:
# If there is already a V1 image for this layer, nothing more to do.
if v1_mdata.docker_id in images_map:
continue
# Lookup the parent image for the layer, if any.
parent_image = None
if v1_mdata.parent is not None:
parent_image = images_map.get(v1_mdata.parent)
if parent_image is None:
msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent)
raise ManifestInvalid(detail={'message': msg})
# Synthesize and store the v1 metadata in the db.
blob_storage = storage_map.get(digest_str)
if blob_storage is None:
raise BlobUnknown(detail={'digest': digest_str})
if leaf_layer is None:
image = model.image.synthesize_v1_image(repo, blob_storage, v1_mdata.docker_id,
v1_mdata.created, v1_mdata.comment, v1_mdata.command,
mdata.v1_metadata_str, parent_image)
images_map[v1_mdata.docker_id] = image
if not layers:
# The manifest doesn't actually reference any layers!
raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'})
# Store the manifest pointing to the tag.
leaf_layer = layers[0]
model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer.v1_metadata.docker_id,
manifest_digest, request.data)
# Spawn the repo_push event.
repo = model.repository.get_repository(namespace, repo_name)
if repo is not None:
event_data = {
'updated_tags': [tag_name],
}

View file

@ -205,6 +205,9 @@ class BaseRegistryMixin(object):
self.assertEquals(response.status_code, expected_code)
return response
def _get_default_images(self):
return [{'id': 'someid', 'contents': 'somecontent'}]
class V1RegistryMixin(BaseRegistryMixin):
def v1_ping(self):
@ -212,20 +215,21 @@ class V1RegistryMixin(BaseRegistryMixin):
class V1RegistryPushMixin(V1RegistryMixin):
def do_push(self, namespace, repository, username, password, images):
def do_push(self, namespace, repository, username, password, images=None):
images = images or self._get_default_images()
auth = (username, password)
# Ping!
self.v1_ping()
# PUT /v1/repositories/{namespace}/{repository}/
data = [{"id": image_id} for image_id, _ in images.iteritems()]
self.conduct('PUT', '/v1/repositories/%s/%s' % (namespace, repository),
data=json.dumps(data), auth=auth,
data=json.dumps(images), auth=auth,
expected_code=201)
last_image_id = None
for image_id, _ in images.iteritems():
for image_data in images:
image_id = image_data['id']
last_image_id = image_id
# PUT /v1/images/{imageID}/json
@ -364,8 +368,10 @@ class V2RegistryMixin(BaseRegistryMixin):
class V2RegistryPushMixin(V2RegistryMixin):
def do_push(self, namespace, repository, username, password, images, tag_name=None,
cancel=False, invalid=False):
def do_push(self, namespace, repository, username, password, images=None, tag_name=None,
cancel=False, invalid=False, expected_manifest_code=202):
images = images or self._get_default_images()
# Ping!
self.v2_ping()
@ -375,30 +381,22 @@ class V2RegistryPushMixin(V2RegistryMixin):
# Build a fake manifest.
tag_name = tag_name or 'latest'
builder = SignedManifestBuilder(namespace, repository, tag_name)
for image_id, contents in images.iteritems():
if isinstance(contents, dict):
full_contents = contents['contents']
else:
full_contents = contents
checksum = 'sha256:' + hashlib.sha256(full_contents).hexdigest()
for image_data in images:
checksum = 'sha256:' + hashlib.sha256(image_data['contents']).hexdigest()
if invalid:
checksum = 'sha256:' + hashlib.sha256('foobarbaz').hexdigest()
builder.add_layer(checksum, json.dumps({'id': image_id, 'data': contents}))
builder.add_layer(checksum, json.dumps(image_data))
# Build the manifest.
manifest = builder.build(_JWK)
# Push the image's layers.
checksums = {}
for image_id, contents in images.iteritems():
chunks = None
if isinstance(contents, dict):
full_contents = contents['contents']
chunks = contents['chunks']
else:
full_contents = contents
for image_data in images:
image_id = image_data['id']
full_contents = image_data['contents']
chunks = image_data.get('chunks')
# Layer data should not yet exist.
checksum = 'sha256:' + hashlib.sha256(full_contents).hexdigest()
@ -414,7 +412,7 @@ class V2RegistryPushMixin(V2RegistryMixin):
# PATCH the image data into the layer.
if chunks is None:
self.conduct('PATCH', location, data=contents, expected_code=204, auth='jwt')
self.conduct('PATCH', location, data=full_contents, expected_code=204, auth='jwt')
else:
for chunk in chunks:
if len(chunk) == 3:
@ -461,7 +459,7 @@ class V2RegistryPushMixin(V2RegistryMixin):
self.assertEquals(response.headers['Content-Length'], str(len(full_contents)))
# Write the manifest.
put_code = 404 if invalid else 202
put_code = 404 if invalid else expected_manifest_code
self.conduct('PUT', '/v2/%s/%s/manifests/%s' % (namespace, repository, tag_name),
data=manifest.bytes, expected_code=put_code,
headers={'Content-Type': 'application/json'}, auth='jwt')
@ -508,11 +506,7 @@ class V2RegistryPullMixin(V2RegistryMixin):
class RegistryTestsMixin(object):
def test_pull_publicrepo_anonymous(self):
# Add a new repository under the public user, so we have a real repository to pull.
images = {
'someid': 'onlyimagehere'
}
self.do_push('public', 'newrepo', 'public', 'password', images)
self.do_push('public', 'newrepo', 'public', 'password')
self.clearSession()
# First try to pull the (currently private) repo anonymously, which should fail (since it is
@ -530,11 +524,7 @@ class RegistryTestsMixin(object):
def test_pull_publicrepo_devtable(self):
# Add a new repository under the public user, so we have a real repository to pull.
images = {
'someid': 'onlyimagehere'
}
self.do_push('public', 'newrepo', 'public', 'password', images)
self.do_push('public', 'newrepo', 'public', 'password')
self.clearSession()
# First try to pull the (currently private) repo as devtable, which should fail as it belongs
@ -552,11 +542,7 @@ class RegistryTestsMixin(object):
def test_pull_private_repo(self):
# Add a new repository under the devtable user, so we have a real repository to pull.
images = {
'someid': 'onlyimagehere'
}
self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
self.do_push('devtable', 'newrepo', 'devtable', 'password')
self.clearSession()
# First try to pull the (currently private) repo as public, which should fail as it belongs
@ -572,11 +558,7 @@ class RegistryTestsMixin(object):
# Turn off anonymous access.
with TestFeature(self, 'ANONYMOUS_ACCESS', False):
# Add a new repository under the public user, so we have a real repository to pull.
images = {
'someid': 'onlyimagehere'
}
self.do_push('public', 'newrepo', 'public', 'password', images)
self.do_push('public', 'newrepo', 'public', 'password')
self.clearSession()
# First try to pull the (currently private) repo as devtable, which should fail as it belongs
@ -596,11 +578,7 @@ class RegistryTestsMixin(object):
# Turn off anonymous access.
with TestFeature(self, 'ANONYMOUS_ACCESS', False):
# Add a new repository under the public user, so we have a real repository to pull.
images = {
'someid': 'onlyimagehere'
}
self.do_push('public', 'newrepo', 'public', 'password', images)
self.do_push('public', 'newrepo', 'public', 'password')
self.clearSession()
# First try to pull the (currently private) repo as devtable, which should fail as it belongs
@ -615,11 +593,7 @@ class RegistryTestsMixin(object):
# Turn off anonymous access.
with TestFeature(self, 'ANONYMOUS_ACCESS', False):
# Add a new repository under the public user, so we have a real repository to pull.
images = {
'someid': 'onlyimagehere'
}
self.do_push('public', 'newrepo', 'public', 'password', images)
self.do_push('public', 'newrepo', 'public', 'password')
self.clearSession()
# First try to pull the (currently private) repo as anonymous, which should fail as it
@ -643,11 +617,7 @@ class RegistryTestsMixin(object):
def test_create_repo_creator_user(self):
images = {
'someid': 'onlyimagehere'
}
self.do_push('buynlarge', 'newrepo', 'creator', 'password', images)
self.do_push('buynlarge', 'newrepo', 'creator', 'password')
# Pull the repository as devtable, which should succeed because the repository is owned by the
# org.
@ -660,11 +630,7 @@ class RegistryTestsMixin(object):
resp = self.conduct('GET', '/api/v1/organization/buynlarge/robots/ownerbot')
robot_token = json.loads(resp.text)['token']
images = {
'someid': 'onlyimagehere'
}
self.do_push('buynlarge', 'newrepo', 'buynlarge+ownerbot', robot_token, images)
self.do_push('buynlarge', 'newrepo', 'buynlarge+ownerbot', robot_token)
# Pull the repository as devtable, which should succeed because the repository is owned by the
# org.
@ -677,11 +643,7 @@ class RegistryTestsMixin(object):
resp = self.conduct('GET', '/api/v1/organization/buynlarge/robots/creatorbot')
robot_token = json.loads(resp.text)['token']
images = {
'someid': 'onlyimagehere'
}
self.do_push('buynlarge', 'newrepo', 'buynlarge+creatorbot', robot_token, images)
self.do_push('buynlarge', 'newrepo', 'buynlarge+creatorbot', robot_token)
# Pull the repository as devtable, which should succeed because the repository is owned by the
# org.
@ -697,27 +659,15 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
""" Tests for V2 registry. """
def test_invalid_push(self):
images = {
'someid': 'onlyimagehere'
}
self.do_push('devtable', 'newrepo', 'devtable', 'password', images, invalid=True)
self.do_push('devtable', 'newrepo', 'devtable', 'password', invalid=True)
def test_cancel_push(self):
images = {
'someid': 'onlyimagehere'
}
self.do_push('devtable', 'newrepo', 'devtable', 'password', images, cancel=True)
self.do_push('devtable', 'newrepo', 'devtable', 'password', cancel=True)
def test_pull_by_checksum(self):
# Add a new repository under the user, so we have a real repository to pull.
images = {
'someid': 'onlyimagehere'
}
_, digest = self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
_, digest = self.do_push('devtable', 'newrepo', 'devtable', 'password')
# Attempt to pull by digest.
self.do_pull('devtable', 'newrepo', 'devtable', 'password', manifest_id=digest)
@ -725,11 +675,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
def test_pull_invalid_image_tag(self):
# Add a new repository under the user, so we have a real repository to pull.
images = {
'someid': 'onlyimagehere'
}
self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
self.do_push('devtable', 'newrepo', 'devtable', 'password')
self.clearSession()
# Attempt to pull the invalid tag.
@ -745,15 +691,16 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
chunk_count = int(math.ceil((len(contents) * 1.0) / chunksize))
chunks = [(index * chunksize, (index + 1)*chunksize) for index in range(chunk_count)]
images = {
'someid': {
images = [
{
'id':'someid',
'contents': contents,
'chunks': chunks
}
}
]
# Push the chunked upload.
self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)
# Pull the image back and verify the contents.
blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password')
@ -765,15 +712,16 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
contents = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(size))
chunks = [(0, 100), (100, size)]
images = {
'someid': {
images = [
{
'id':'someid',
'contents': contents,
'chunks': chunks
}
}
]
# Push the chunked upload.
self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)
# Pull the image back and verify the contents.
blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password')
@ -786,15 +734,16 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
chunks = [(0, 100), (10, size)]
images = {
'someid': {
images = [
{
'id':'someid',
'contents': contents,
'chunks': chunks
}
}
]
# Push the chunked upload.
self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)
# Pull the image back and verify the contents.
blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password')
@ -807,22 +756,66 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
chunks = [(0, 100), (101, size, 416)]
images = {
'someid': {
images = [
{
'id':'someid',
'contents': contents,
'chunks': chunks
}
}
]
# Attempt to push the chunked upload, which should fail.
self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)
def test_multiple_layers_invalid(self):
# Attempt to push a manifest with an image depending on an unknown base layer.
images = [
{
'id': 'latestid',
'contents': 'the latest image',
'parent': 'baseid',
}
]
self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images,
expected_manifest_code=400)
def test_multiple_layers(self):
# Push a manifest with multiple layers.
images = [
{
'id': 'latestid',
'contents': 'the latest image',
'parent': 'baseid',
},
{
'id': 'baseid',
'contents': 'The base image',
}
]
self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)
def test_multiple_tags(self):
latest_images = [
{
'id': 'latestid',
'contents': 'the latest image'
}
]
foobar_images = [
{
'id': 'foobarid',
'contents': 'the foobar image',
}
]
# Create the repo.
self.do_push('devtable', 'newrepo', 'devtable', 'password', {'someid': 'onlyimagehere'},
self.do_push('devtable', 'newrepo', 'devtable', 'password', images=latest_images,
tag_name='latest')
self.do_push('devtable', 'newrepo', 'devtable', 'password', {'anotherid': 'anotherimage'},
self.do_push('devtable', 'newrepo', 'devtable', 'password', images=foobar_images,
tag_name='foobar')
# Retrieve the tags.
@ -846,6 +839,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
self.conduct('GET', '/v2/devtable/doesnotexist/tags/list', auth='jwt', expected_code=401)
class V1PushV2PullRegistryTests(V2RegistryPullMixin, V1RegistryPushMixin, RegistryTestsMixin,
RegistryTestCaseMixin, LiveServerTestCase):
""" Tests for V1 push, V2 pull registry. """