From f8dd8b2494ff65d573a536b67b2427e200126666 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 5 Feb 2019 18:03:58 -0500 Subject: [PATCH 1/6] Have tag backfill worker continue processing ranges that have errors --- workers/tagbackfillworker.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workers/tagbackfillworker.py b/workers/tagbackfillworker.py index fa5f5b8d0..9e6671183 100644 --- a/workers/tagbackfillworker.py +++ b/workers/tagbackfillworker.py @@ -156,7 +156,7 @@ class TagBackfillWorker(Worker): for candidate, abt, _ in iterator: if not backfill_tag(candidate): - logger.info('Another worker pre-empted us for label: %s', candidate.id) + logger.info('Another worker pre-empted us for tag: %s', candidate.id) abt.set() @@ -171,14 +171,14 @@ def lookup_map_row(repositorytag): def backfill_tag(repositorytag): logger.info('Backfilling tag %s', repositorytag.id) - # Ensure that a mapping row doesn't already exist. If it does, we've been preempted. + # Ensure that a mapping row doesn't already exist. If it does, nothing more to do. if lookup_map_row(repositorytag): return False # Grab the manifest for the RepositoryTag, backfilling as necessary. manifest_id = _get_manifest_id(repositorytag) if manifest_id is None: - return False + return True lifetime_start_ms = (repositorytag.lifetime_start_ts * 1000 if repositorytag.lifetime_start_ts is not None else None) From 82897a2bba3fc8725c48f225a45e885bb5a24afc Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 5 Feb 2019 18:04:11 -0500 Subject: [PATCH 2/6] Add a log for an integrity error in writing a manifest --- data/model/tag.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data/model/tag.py b/data/model/tag.py index 345a0a391..e22156dbf 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -799,7 +799,8 @@ def populate_manifest(repository, manifest, legacy_image, storage_ids): manifest_row = Manifest.create(digest=manifest.digest, repository=repository, manifest_bytes=manifest.bytes.as_encoded_str(), media_type=media_type) - except IntegrityError: + except IntegrityError as ie: + logger.debug('Got integrity error when trying to write manifest: %s', ie) return Manifest.get(repository=repository, digest=manifest.digest) ManifestLegacyImage.create(manifest=manifest_row, repository=repository, image=legacy_image) From 097311f36048c6dfecfbb2597c9bb43f037b42f5 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 5 Feb 2019 18:06:03 -0500 Subject: [PATCH 3/6] Handle additional error cases in the manifest schema 1 implementation --- image/docker/schema1.py | 8 ++++++-- image/docker/test/test_schema1.py | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 6f8660e61..337c9547d 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -377,7 +377,11 @@ class DockerSchema1Manifest(ManifestInterface): metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY] - v1_metadata = json.loads(metadata_string) + try: + v1_metadata = json.loads(metadata_string) + except (ValueError, TypeError): + raise MalformedSchema1Manifest('Could not parse metadata string: %s' % metadata_string) + command_list = v1_metadata.get('container_config', {}).get('Cmd', None) command = to_canonical_json(command_list) if command_list else None @@ -536,7 +540,7 @@ class DockerSchema1ManifestBuilder(object): DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest, }) self._history.append({ - DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata, + DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata or '{}', }) return self diff --git a/image/docker/test/test_schema1.py b/image/docker/test/test_schema1.py index 4392ae8cb..0fd473b6a 100644 --- a/image/docker/test/test_schema1.py +++ b/image/docker/test/test_schema1.py @@ -197,3 +197,19 @@ def test_validate_manifest_with_emoji(with_key): # Ensure the manifest can be reloaded. built_bytes = built.bytes.as_encoded_str() DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes)) + + +@pytest.mark.parametrize('with_key', [ + None, + docker_v2_signing_key, +]) +def test_validate_manifest_with_none_metadata_layer(with_key): + builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag') + builder.add_layer('sha256:abcde', None) + + built = builder.build(with_key, ensure_ascii=False) + built._validate() + + # Ensure the manifest can be reloaded. + built_bytes = built.bytes.as_encoded_str() + DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes)) From 606b08906c2838b3cd6a66fc43bd6c28f731c2ec Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 6 Feb 2019 11:40:33 -0500 Subject: [PATCH 4/6] Fix typo --- util/migrate/allocator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/migrate/allocator.py b/util/migrate/allocator.py index d37abb6e9..44f4b59e4 100644 --- a/util/migrate/allocator.py +++ b/util/migrate/allocator.py @@ -134,7 +134,7 @@ class CompletedKeys(object): def yield_random_entries(batch_query, primary_key_field, batch_size, max_id, min_id=0): """ This method will yield items from random blocks in the database. We will track metadata about which keys are available for work, and we will complete the backfill when there is no - more work to be done. The method yields tupes of (candidate, Event), and if the work was + more work to be done. The method yields tuples of (candidate, Event), and if the work was already done by another worker, the caller should set the event. Batch candidates must have an "id" field which can be inspected. """ From bbaee0d3f26388866376a3a8aa4e730fd501b641 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 6 Feb 2019 17:34:18 -0500 Subject: [PATCH 5/6] Add additional logging for missing parent images --- data/model/image.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/data/model/image.py b/data/model/image.py index 972067514..1615dd44d 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -59,7 +59,8 @@ def get_parent_images(namespace_name, repository_name, image_obj): id_to_image = {unicode(image.id): image for image in parents} try: return [id_to_image[parent_id] for parent_id in reversed(parent_db_ids)] - except KeyError: + except KeyError as ke: + logger.exception('Could not find an expected parent image for image %s', image_obj.id) raise DataModelException('Unknown parent image') From 0bfbccdd445355c9127bc1482a6a1f80d1f7a654 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 7 Feb 2019 13:36:47 -0500 Subject: [PATCH 6/6] Add a handler for broken tags in the tag backfill system This will generate a tag pointing to an empty manifest; the tag will be broken, but as it is *already* broken, at least the backfill can complete --- data/registry_model/registry_pre_oci_model.py | 3 ++ data/registry_model/shared.py | 6 +++- workers/tagbackfillworker.py | 20 ++++++++----- workers/test/test_tagbackfillworker.py | 30 ++++++++++++++++++- 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index 1a5094ae6..62cd1d31e 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -478,6 +478,9 @@ class PreOCIModel(SharedModel, RegistryDataInterface): # Write the manifest to the DB. manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image) + if manifest is None: + return None + blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo, manifest.checksums) diff --git a/data/registry_model/shared.py b/data/registry_model/shared.py index 16c87b96c..091061526 100644 --- a/data/registry_model/shared.py +++ b/data/registry_model/shared.py @@ -418,7 +418,11 @@ class SharedModel: repo_name = repo.name # Find the v1 metadata for this image and its parents. - parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row) + try: + parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row) + except model.DataModelException: + logger.exception('Could not load parent images for legacy image %s', legacy_image_row.id) + return None # If the manifest is being generated under the library namespace, then we make its namespace # empty. diff --git a/workers/tagbackfillworker.py b/workers/tagbackfillworker.py index 9e6671183..827f5f9de 100644 --- a/workers/tagbackfillworker.py +++ b/workers/tagbackfillworker.py @@ -225,15 +225,19 @@ def _get_manifest_id(repositorytag): manifest_datatype = pre_oci_model.get_manifest_for_tag(repository_tag_datatype, backfill_if_necessary=True) if manifest_datatype is None: - logger.error('Missing manifest for tag `%s`', repositorytag.id) - return None + logger.error('Could not load or backfill manifest for tag `%s`', repositorytag.id) - # Retrieve the new-style Manifest for the TagManifest, if any. - try: - tag_manifest = TagManifest.get(id=manifest_datatype._db_id) - except TagManifest.DoesNotExist: - logger.exception('Could not find tag manifest') - return None + # Create a broken manifest for the tag. + tag_manifest = TagManifest.create(tag=repositorytag, + digest='BROKEN-%s' % repositorytag.id, + json_data='{}') + else: + # Retrieve the new-style Manifest for the TagManifest, if any. + try: + tag_manifest = TagManifest.get(id=manifest_datatype._db_id) + except TagManifest.DoesNotExist: + logger.exception('Could not find tag manifest') + return None try: found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest diff --git a/workers/test/test_tagbackfillworker.py b/workers/test/test_tagbackfillworker.py index 430cba3a8..f9615b064 100644 --- a/workers/test/test_tagbackfillworker.py +++ b/workers/test/test_tagbackfillworker.py @@ -2,7 +2,8 @@ from app import docker_v2_signing_key from data import model from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob, ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image, - TagManifestLabel, Tag, TagToRepositoryTag, Repository) + TagManifestLabel, Tag, TagToRepositoryTag, Repository, + ImageStorage) from image.docker.schema1 import DockerSchema1ManifestBuilder from workers.tagbackfillworker import backfill_tag, _backfill_manifest @@ -251,3 +252,30 @@ def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db): map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2) assert map_row1.manifest == map_row2.manifest + + +def test_manifest_backfill_broken_tag(clear_rows, initialized_db): + """ Tests backfilling a broken tag. """ + # Delete existing tag manifest so we can reuse the tag. + TagManifestLabel.delete().execute() + TagManifest.delete().execute() + + # Create a tag with an image referenced missing parent images. + repo = model.repository.get_repository('devtable', 'gargantuan') + broken_image = Image.create(docker_image_id='foo', repository=repo, ancestors='/348723847234/', + storage=ImageStorage.get()) + broken_image_tag = RepositoryTag.create(repository=repo, image=broken_image, name='broken') + + # Backfill the tag. + assert backfill_tag(broken_image_tag) + + # Ensure we backfilled, even though we reference a broken manifest. + tag_manifest = TagManifest.get(tag=broken_image_tag) + + map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) + manifest = map_row.manifest + assert manifest.manifest_bytes == tag_manifest.json_data + + tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag + assert tag.name == 'broken' + assert tag.manifest == manifest