From 35c35d9913aea2b12a48e991f55591a1c97a3a88 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 29 Sep 2015 17:53:39 -0400 Subject: [PATCH] Load images and storage references in bulk during V1 synthesize Currently, we perform multiple queries for each layer, making it much slower (especially cross-region) Fixes #413 --- data/model/image.py | 36 ++----- data/model/storage.py | 9 +- endpoints/v2/manifest.py | 79 +++++++++++---- test/registry_tests.py | 206 +++++++++++++++++++-------------------- 4 files changed, 173 insertions(+), 157 deletions(-) diff --git a/data/model/image.py b/data/model/image.py index 922da8280..12b6e9dd7 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -105,13 +105,10 @@ def _translate_placements_to_images_with_locations(query): return images.values() -def lookup_repository_images(namespace_name, repository_name, docker_image_ids): +def lookup_repository_images(repo, docker_image_ids): return (Image .select() - .join(Repository) - .join(Namespace, on=(Repository.namespace_user == Namespace.id)) - .where(Repository.name == repository_name, Namespace.username == namespace_name, - Image.docker_image_id << docker_image_ids)) + .where(Image.repository == repo, Image.docker_image_id << docker_image_ids)) def get_matching_repository_images(namespace_name, repository_name, docker_image_ids): @@ -415,31 +412,14 @@ def get_image_layers(image): return image_list -def synthesize_v1_image(namespace, repository_name, storage_checksum, docker_image_id, - created_date_str, comment, command, v1_json_metadata, parent_docker_id): +def synthesize_v1_image(repo, image_storage, docker_image_id, created_date_str, + comment, command, v1_json_metadata, parent_image=None): """ Find an existing image with this docker image id, and if none exists, write one with the specified metadata. - """ - - repo = _basequery.get_existing_repository(namespace, repository_name) - # Sometimes the manifest may reference an image that already exists - - found = get_image(repo, docker_image_id) - if found is not None: - # The image already exists, nothing to do - return found - - the_bits = storage.get_repo_storage_by_checksum(namespace, repository_name, storage_checksum) - + """ ancestors = '/' - if parent_docker_id is not None: - parent = get_repo_image(namespace, repository_name, parent_docker_id) - if parent is None: - msg = 'Parent not found with docker image id {0} in repo {1}/{2}'.format(parent_docker_id, - namespace, - repository_name) - raise InvalidImageException(msg) - ancestors = '{0}{1}/'.format(parent.ancestors, parent.id) + if parent_image is not None: + ancestors = '{0}{1}/'.format(parent_image.ancestors, parent_image.id) created = None if created_date_str is not None: @@ -451,4 +431,4 @@ def synthesize_v1_image(namespace, repository_name, storage_checksum, docker_ima return Image.create(docker_image_id=docker_image_id, ancestors=ancestors, comment=comment, command=command, v1_json_metadata=v1_json_metadata, created=created, - storage=the_bits, repository=repo) + storage=image_storage, repository=repo) diff --git a/data/model/storage.py b/data/model/storage.py index 4b44a2005..73880e205 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -232,5 +232,10 @@ def get_layer_path(storage_record): return store.blob_path(storage_record.checksum) - - +def lookup_repo_storages_by_checksum(repo, checksums): + """ Looks up repository storages (without placements) matching the given repository + and checksum. """ + return (ImageStorage + .select() + .join(Image) + .where(Image.repository == repo, ImageStorage.checksum << checksums)) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index c369af141..fc5778b64 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -98,7 +98,7 @@ class SignedManifest(object): @property def layers(self): """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, - starting from the root image and working toward the leaf node. + starting from the leaf image and working toward the base node. """ for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY], self._parsed[_HISTORY_KEY])): @@ -258,42 +258,79 @@ def write_manifest_by_digest(namespace, repo_name, manifest_ref): def _write_manifest(namespace, repo_name, manifest): + # Ensure that the manifest is for this repository. if manifest.namespace != namespace or manifest.repo_name != repo_name: raise NameInvalid() + # Ensure that the repository exists. + repo = model.repository.get_repository(namespace, repo_name) + if repo is None: + raise NameInvalid() + + # Lookup all the images and their parent images (if any) inside the manifest. This will let us + # know which V1 images we need to synthesize and which ones are invalid. + layers = list(manifest.layers) + + docker_image_ids = [mdata.v1_metadata.docker_id for mdata in layers] + parent_image_ids = [mdata.v1_metadata.parent for mdata in layers + if mdata.v1_metadata.parent] + all_image_ids = list(set(docker_image_ids + parent_image_ids)) + + images_query = model.image.lookup_repository_images(repo, all_image_ids) + images_map = {image.docker_image_id: image for image in images_query} + + # Lookup the storages associated with each blob in the manifest. + checksums = [str(mdata.digest) for mdata in manifest.layers] + storage_query = model.storage.lookup_repo_storages_by_checksum(repo, checksums) + storage_map = {storage.checksum: storage for storage in storage_query} + + # Synthesize the V1 metadata for each layer. manifest_digest = manifest.digest tag_name = manifest.tag - leaf_layer = None - try: - for mdata in manifest.layers: - # Store the v1 metadata in the db - v1_mdata = mdata.v1_metadata - digest_str = str(mdata.digest) - model.image.synthesize_v1_image(namespace, repo_name, digest_str, v1_mdata.docker_id, - v1_mdata.created, v1_mdata.comment, v1_mdata.command, - mdata.v1_metadata_str, v1_mdata.parent) - leaf_layer = mdata + for mdata in layers: + digest_str = str(mdata.digest) + v1_mdata = mdata.v1_metadata - except model.InvalidImageException: - raise BlobUnknown(detail={'digest': digest_str}) + # If there is already a V1 image for this layer, nothing more to do. + if v1_mdata.docker_id in images_map: + continue - if leaf_layer is None: + # Lookup the parent image for the layer, if any. + parent_image = None + if v1_mdata.parent is not None: + parent_image = images_map.get(v1_mdata.parent) + if parent_image is None: + msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent) + raise ManifestInvalid(detail={'message': msg}) + + # Synthesize and store the v1 metadata in the db. + blob_storage = storage_map.get(digest_str) + if blob_storage is None: + raise BlobUnknown(detail={'digest': digest_str}) + + image = model.image.synthesize_v1_image(repo, blob_storage, v1_mdata.docker_id, + v1_mdata.created, v1_mdata.comment, v1_mdata.command, + mdata.v1_metadata_str, parent_image) + + images_map[v1_mdata.docker_id] = image + + if not layers: # The manifest doesn't actually reference any layers! raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) + # Store the manifest pointing to the tag. + leaf_layer = layers[0] model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer.v1_metadata.docker_id, manifest_digest, request.data) # Spawn the repo_push event. - repo = model.repository.get_repository(namespace, repo_name) - if repo is not None: - event_data = { - 'updated_tags': [tag_name], - } + event_data = { + 'updated_tags': [tag_name], + } - track_and_log('push_repo', repo) - spawn_notification(repo, 'repo_push', event_data) + track_and_log('push_repo', repo) + spawn_notification(repo, 'repo_push', event_data) response = make_response('OK', 202) response.headers['Docker-Content-Digest'] = manifest_digest diff --git a/test/registry_tests.py b/test/registry_tests.py index 36c206e7a..8b1c2c622 100644 --- a/test/registry_tests.py +++ b/test/registry_tests.py @@ -205,6 +205,9 @@ class BaseRegistryMixin(object): self.assertEquals(response.status_code, expected_code) return response + def _get_default_images(self): + return [{'id': 'someid', 'contents': 'somecontent'}] + class V1RegistryMixin(BaseRegistryMixin): def v1_ping(self): @@ -212,20 +215,21 @@ class V1RegistryMixin(BaseRegistryMixin): class V1RegistryPushMixin(V1RegistryMixin): - def do_push(self, namespace, repository, username, password, images): + def do_push(self, namespace, repository, username, password, images=None): + images = images or self._get_default_images() auth = (username, password) # Ping! self.v1_ping() # PUT /v1/repositories/{namespace}/{repository}/ - data = [{"id": image_id} for image_id, _ in images.iteritems()] self.conduct('PUT', '/v1/repositories/%s/%s' % (namespace, repository), - data=json.dumps(data), auth=auth, + data=json.dumps(images), auth=auth, expected_code=201) last_image_id = None - for image_id, _ in images.iteritems(): + for image_data in images: + image_id = image_data['id'] last_image_id = image_id # PUT /v1/images/{imageID}/json @@ -364,8 +368,10 @@ class V2RegistryMixin(BaseRegistryMixin): class V2RegistryPushMixin(V2RegistryMixin): - def do_push(self, namespace, repository, username, password, images, tag_name=None, - cancel=False, invalid=False): + def do_push(self, namespace, repository, username, password, images=None, tag_name=None, + cancel=False, invalid=False, expected_manifest_code=202): + images = images or self._get_default_images() + # Ping! self.v2_ping() @@ -375,30 +381,22 @@ class V2RegistryPushMixin(V2RegistryMixin): # Build a fake manifest. tag_name = tag_name or 'latest' builder = SignedManifestBuilder(namespace, repository, tag_name) - for image_id, contents in images.iteritems(): - if isinstance(contents, dict): - full_contents = contents['contents'] - else: - full_contents = contents - - checksum = 'sha256:' + hashlib.sha256(full_contents).hexdigest() + for image_data in images: + checksum = 'sha256:' + hashlib.sha256(image_data['contents']).hexdigest() if invalid: checksum = 'sha256:' + hashlib.sha256('foobarbaz').hexdigest() - builder.add_layer(checksum, json.dumps({'id': image_id, 'data': contents})) + builder.add_layer(checksum, json.dumps(image_data)) # Build the manifest. manifest = builder.build(_JWK) # Push the image's layers. checksums = {} - for image_id, contents in images.iteritems(): - chunks = None - if isinstance(contents, dict): - full_contents = contents['contents'] - chunks = contents['chunks'] - else: - full_contents = contents + for image_data in images: + image_id = image_data['id'] + full_contents = image_data['contents'] + chunks = image_data.get('chunks') # Layer data should not yet exist. checksum = 'sha256:' + hashlib.sha256(full_contents).hexdigest() @@ -414,7 +412,7 @@ class V2RegistryPushMixin(V2RegistryMixin): # PATCH the image data into the layer. if chunks is None: - self.conduct('PATCH', location, data=contents, expected_code=204, auth='jwt') + self.conduct('PATCH', location, data=full_contents, expected_code=204, auth='jwt') else: for chunk in chunks: if len(chunk) == 3: @@ -461,7 +459,7 @@ class V2RegistryPushMixin(V2RegistryMixin): self.assertEquals(response.headers['Content-Length'], str(len(full_contents))) # Write the manifest. - put_code = 404 if invalid else 202 + put_code = 404 if invalid else expected_manifest_code self.conduct('PUT', '/v2/%s/%s/manifests/%s' % (namespace, repository, tag_name), data=manifest.bytes, expected_code=put_code, headers={'Content-Type': 'application/json'}, auth='jwt') @@ -508,11 +506,7 @@ class V2RegistryPullMixin(V2RegistryMixin): class RegistryTestsMixin(object): def test_pull_publicrepo_anonymous(self): # Add a new repository under the public user, so we have a real repository to pull. - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('public', 'newrepo', 'public', 'password', images) + self.do_push('public', 'newrepo', 'public', 'password') self.clearSession() # First try to pull the (currently private) repo anonymously, which should fail (since it is @@ -530,11 +524,7 @@ class RegistryTestsMixin(object): def test_pull_publicrepo_devtable(self): # Add a new repository under the public user, so we have a real repository to pull. - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('public', 'newrepo', 'public', 'password', images) + self.do_push('public', 'newrepo', 'public', 'password') self.clearSession() # First try to pull the (currently private) repo as devtable, which should fail as it belongs @@ -552,11 +542,7 @@ class RegistryTestsMixin(object): def test_pull_private_repo(self): # Add a new repository under the devtable user, so we have a real repository to pull. - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('devtable', 'newrepo', 'devtable', 'password', images) + self.do_push('devtable', 'newrepo', 'devtable', 'password') self.clearSession() # First try to pull the (currently private) repo as public, which should fail as it belongs @@ -572,11 +558,7 @@ class RegistryTestsMixin(object): # Turn off anonymous access. with TestFeature(self, 'ANONYMOUS_ACCESS', False): # Add a new repository under the public user, so we have a real repository to pull. - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('public', 'newrepo', 'public', 'password', images) + self.do_push('public', 'newrepo', 'public', 'password') self.clearSession() # First try to pull the (currently private) repo as devtable, which should fail as it belongs @@ -596,11 +578,7 @@ class RegistryTestsMixin(object): # Turn off anonymous access. with TestFeature(self, 'ANONYMOUS_ACCESS', False): # Add a new repository under the public user, so we have a real repository to pull. - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('public', 'newrepo', 'public', 'password', images) + self.do_push('public', 'newrepo', 'public', 'password') self.clearSession() # First try to pull the (currently private) repo as devtable, which should fail as it belongs @@ -615,11 +593,7 @@ class RegistryTestsMixin(object): # Turn off anonymous access. with TestFeature(self, 'ANONYMOUS_ACCESS', False): # Add a new repository under the public user, so we have a real repository to pull. - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('public', 'newrepo', 'public', 'password', images) + self.do_push('public', 'newrepo', 'public', 'password') self.clearSession() # First try to pull the (currently private) repo as anonymous, which should fail as it @@ -643,11 +617,7 @@ class RegistryTestsMixin(object): def test_create_repo_creator_user(self): - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('buynlarge', 'newrepo', 'creator', 'password', images) + self.do_push('buynlarge', 'newrepo', 'creator', 'password') # Pull the repository as devtable, which should succeed because the repository is owned by the # org. @@ -660,11 +630,7 @@ class RegistryTestsMixin(object): resp = self.conduct('GET', '/api/v1/organization/buynlarge/robots/ownerbot') robot_token = json.loads(resp.text)['token'] - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('buynlarge', 'newrepo', 'buynlarge+ownerbot', robot_token, images) + self.do_push('buynlarge', 'newrepo', 'buynlarge+ownerbot', robot_token) # Pull the repository as devtable, which should succeed because the repository is owned by the # org. @@ -677,11 +643,7 @@ class RegistryTestsMixin(object): resp = self.conduct('GET', '/api/v1/organization/buynlarge/robots/creatorbot') robot_token = json.loads(resp.text)['token'] - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('buynlarge', 'newrepo', 'buynlarge+creatorbot', robot_token, images) + self.do_push('buynlarge', 'newrepo', 'buynlarge+creatorbot', robot_token) # Pull the repository as devtable, which should succeed because the repository is owned by the # org. @@ -697,27 +659,15 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix """ Tests for V2 registry. """ def test_invalid_push(self): - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('devtable', 'newrepo', 'devtable', 'password', images, invalid=True) + self.do_push('devtable', 'newrepo', 'devtable', 'password', invalid=True) def test_cancel_push(self): - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('devtable', 'newrepo', 'devtable', 'password', images, cancel=True) + self.do_push('devtable', 'newrepo', 'devtable', 'password', cancel=True) def test_pull_by_checksum(self): # Add a new repository under the user, so we have a real repository to pull. - images = { - 'someid': 'onlyimagehere' - } - - _, digest = self.do_push('devtable', 'newrepo', 'devtable', 'password', images) + _, digest = self.do_push('devtable', 'newrepo', 'devtable', 'password') # Attempt to pull by digest. self.do_pull('devtable', 'newrepo', 'devtable', 'password', manifest_id=digest) @@ -725,11 +675,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix def test_pull_invalid_image_tag(self): # Add a new repository under the user, so we have a real repository to pull. - images = { - 'someid': 'onlyimagehere' - } - - self.do_push('devtable', 'newrepo', 'devtable', 'password', images) + self.do_push('devtable', 'newrepo', 'devtable', 'password') self.clearSession() # Attempt to pull the invalid tag. @@ -745,15 +691,16 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix chunk_count = int(math.ceil((len(contents) * 1.0) / chunksize)) chunks = [(index * chunksize, (index + 1)*chunksize) for index in range(chunk_count)] - images = { - 'someid': { + images = [ + { + 'id':'someid', 'contents': contents, 'chunks': chunks } - } + ] # Push the chunked upload. - self.do_push('devtable', 'newrepo', 'devtable', 'password', images) + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) # Pull the image back and verify the contents. blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password') @@ -765,15 +712,16 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix contents = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(size)) chunks = [(0, 100), (100, size)] - images = { - 'someid': { + images = [ + { + 'id':'someid', 'contents': contents, 'chunks': chunks } - } + ] # Push the chunked upload. - self.do_push('devtable', 'newrepo', 'devtable', 'password', images) + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) # Pull the image back and verify the contents. blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password') @@ -786,15 +734,16 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix chunks = [(0, 100), (10, size)] - images = { - 'someid': { + images = [ + { + 'id':'someid', 'contents': contents, 'chunks': chunks } - } + ] # Push the chunked upload. - self.do_push('devtable', 'newrepo', 'devtable', 'password', images) + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) # Pull the image back and verify the contents. blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password') @@ -807,22 +756,66 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix chunks = [(0, 100), (101, size, 416)] - images = { - 'someid': { + images = [ + { + 'id':'someid', 'contents': contents, 'chunks': chunks } - } + ] # Attempt to push the chunked upload, which should fail. - self.do_push('devtable', 'newrepo', 'devtable', 'password', images) + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) + + def test_multiple_layers_invalid(self): + # Attempt to push a manifest with an image depending on an unknown base layer. + images = [ + { + 'id': 'latestid', + 'contents': 'the latest image', + 'parent': 'baseid', + } + ] + + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images, + expected_manifest_code=400) + + def test_multiple_layers(self): + # Push a manifest with multiple layers. + images = [ + { + 'id': 'latestid', + 'contents': 'the latest image', + 'parent': 'baseid', + }, + { + 'id': 'baseid', + 'contents': 'The base image', + } + ] + + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) def test_multiple_tags(self): + latest_images = [ + { + 'id': 'latestid', + 'contents': 'the latest image' + } + ] + + foobar_images = [ + { + 'id': 'foobarid', + 'contents': 'the foobar image', + } + ] + # Create the repo. - self.do_push('devtable', 'newrepo', 'devtable', 'password', {'someid': 'onlyimagehere'}, + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=latest_images, tag_name='latest') - self.do_push('devtable', 'newrepo', 'devtable', 'password', {'anotherid': 'anotherimage'}, + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=foobar_images, tag_name='foobar') # Retrieve the tags. @@ -846,6 +839,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix self.conduct('GET', '/v2/devtable/doesnotexist/tags/list', auth='jwt', expected_code=401) + class V1PushV2PullRegistryTests(V2RegistryPullMixin, V1RegistryPushMixin, RegistryTestsMixin, RegistryTestCaseMixin, LiveServerTestCase): """ Tests for V1 push, V2 pull registry. """