diff --git a/data/model/image.py b/data/model/image.py index 972067514..1615dd44d 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -59,7 +59,8 @@ def get_parent_images(namespace_name, repository_name, image_obj): id_to_image = {unicode(image.id): image for image in parents} try: return [id_to_image[parent_id] for parent_id in reversed(parent_db_ids)] - except KeyError: + except KeyError as ke: + logger.exception('Could not find an expected parent image for image %s', image_obj.id) raise DataModelException('Unknown parent image') diff --git a/data/model/tag.py b/data/model/tag.py index 345a0a391..e22156dbf 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -799,7 +799,8 @@ def populate_manifest(repository, manifest, legacy_image, storage_ids): manifest_row = Manifest.create(digest=manifest.digest, repository=repository, manifest_bytes=manifest.bytes.as_encoded_str(), media_type=media_type) - except IntegrityError: + except IntegrityError as ie: + logger.debug('Got integrity error when trying to write manifest: %s', ie) return Manifest.get(repository=repository, digest=manifest.digest) ManifestLegacyImage.create(manifest=manifest_row, repository=repository, image=legacy_image) diff --git a/data/registry_model/registry_pre_oci_model.py b/data/registry_model/registry_pre_oci_model.py index 1a5094ae6..62cd1d31e 100644 --- a/data/registry_model/registry_pre_oci_model.py +++ b/data/registry_model/registry_pre_oci_model.py @@ -478,6 +478,9 @@ class PreOCIModel(SharedModel, RegistryDataInterface): # Write the manifest to the DB. manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image) + if manifest is None: + return None + blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo, manifest.checksums) diff --git a/data/registry_model/shared.py b/data/registry_model/shared.py index 16c87b96c..091061526 100644 --- a/data/registry_model/shared.py +++ b/data/registry_model/shared.py @@ -418,7 +418,11 @@ class SharedModel: repo_name = repo.name # Find the v1 metadata for this image and its parents. - parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row) + try: + parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row) + except model.DataModelException: + logger.exception('Could not load parent images for legacy image %s', legacy_image_row.id) + return None # If the manifest is being generated under the library namespace, then we make its namespace # empty. diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 6f8660e61..337c9547d 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -377,7 +377,11 @@ class DockerSchema1Manifest(ManifestInterface): metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY] - v1_metadata = json.loads(metadata_string) + try: + v1_metadata = json.loads(metadata_string) + except (ValueError, TypeError): + raise MalformedSchema1Manifest('Could not parse metadata string: %s' % metadata_string) + command_list = v1_metadata.get('container_config', {}).get('Cmd', None) command = to_canonical_json(command_list) if command_list else None @@ -536,7 +540,7 @@ class DockerSchema1ManifestBuilder(object): DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest, }) self._history.append({ - DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata, + DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata or '{}', }) return self diff --git a/image/docker/test/test_schema1.py b/image/docker/test/test_schema1.py index 4392ae8cb..0fd473b6a 100644 --- a/image/docker/test/test_schema1.py +++ b/image/docker/test/test_schema1.py @@ -197,3 +197,19 @@ def test_validate_manifest_with_emoji(with_key): # Ensure the manifest can be reloaded. built_bytes = built.bytes.as_encoded_str() DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes)) + + +@pytest.mark.parametrize('with_key', [ + None, + docker_v2_signing_key, +]) +def test_validate_manifest_with_none_metadata_layer(with_key): + builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag') + builder.add_layer('sha256:abcde', None) + + built = builder.build(with_key, ensure_ascii=False) + built._validate() + + # Ensure the manifest can be reloaded. + built_bytes = built.bytes.as_encoded_str() + DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes)) diff --git a/util/migrate/allocator.py b/util/migrate/allocator.py index d37abb6e9..44f4b59e4 100644 --- a/util/migrate/allocator.py +++ b/util/migrate/allocator.py @@ -134,7 +134,7 @@ class CompletedKeys(object): def yield_random_entries(batch_query, primary_key_field, batch_size, max_id, min_id=0): """ This method will yield items from random blocks in the database. We will track metadata about which keys are available for work, and we will complete the backfill when there is no - more work to be done. The method yields tupes of (candidate, Event), and if the work was + more work to be done. The method yields tuples of (candidate, Event), and if the work was already done by another worker, the caller should set the event. Batch candidates must have an "id" field which can be inspected. """ diff --git a/workers/tagbackfillworker.py b/workers/tagbackfillworker.py index fa5f5b8d0..827f5f9de 100644 --- a/workers/tagbackfillworker.py +++ b/workers/tagbackfillworker.py @@ -156,7 +156,7 @@ class TagBackfillWorker(Worker): for candidate, abt, _ in iterator: if not backfill_tag(candidate): - logger.info('Another worker pre-empted us for label: %s', candidate.id) + logger.info('Another worker pre-empted us for tag: %s', candidate.id) abt.set() @@ -171,14 +171,14 @@ def lookup_map_row(repositorytag): def backfill_tag(repositorytag): logger.info('Backfilling tag %s', repositorytag.id) - # Ensure that a mapping row doesn't already exist. If it does, we've been preempted. + # Ensure that a mapping row doesn't already exist. If it does, nothing more to do. if lookup_map_row(repositorytag): return False # Grab the manifest for the RepositoryTag, backfilling as necessary. manifest_id = _get_manifest_id(repositorytag) if manifest_id is None: - return False + return True lifetime_start_ms = (repositorytag.lifetime_start_ts * 1000 if repositorytag.lifetime_start_ts is not None else None) @@ -225,15 +225,19 @@ def _get_manifest_id(repositorytag): manifest_datatype = pre_oci_model.get_manifest_for_tag(repository_tag_datatype, backfill_if_necessary=True) if manifest_datatype is None: - logger.error('Missing manifest for tag `%s`', repositorytag.id) - return None + logger.error('Could not load or backfill manifest for tag `%s`', repositorytag.id) - # Retrieve the new-style Manifest for the TagManifest, if any. - try: - tag_manifest = TagManifest.get(id=manifest_datatype._db_id) - except TagManifest.DoesNotExist: - logger.exception('Could not find tag manifest') - return None + # Create a broken manifest for the tag. + tag_manifest = TagManifest.create(tag=repositorytag, + digest='BROKEN-%s' % repositorytag.id, + json_data='{}') + else: + # Retrieve the new-style Manifest for the TagManifest, if any. + try: + tag_manifest = TagManifest.get(id=manifest_datatype._db_id) + except TagManifest.DoesNotExist: + logger.exception('Could not find tag manifest') + return None try: found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest diff --git a/workers/test/test_tagbackfillworker.py b/workers/test/test_tagbackfillworker.py index 430cba3a8..f9615b064 100644 --- a/workers/test/test_tagbackfillworker.py +++ b/workers/test/test_tagbackfillworker.py @@ -2,7 +2,8 @@ from app import docker_v2_signing_key from data import model from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob, ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image, - TagManifestLabel, Tag, TagToRepositoryTag, Repository) + TagManifestLabel, Tag, TagToRepositoryTag, Repository, + ImageStorage) from image.docker.schema1 import DockerSchema1ManifestBuilder from workers.tagbackfillworker import backfill_tag, _backfill_manifest @@ -251,3 +252,30 @@ def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db): map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2) assert map_row1.manifest == map_row2.manifest + + +def test_manifest_backfill_broken_tag(clear_rows, initialized_db): + """ Tests backfilling a broken tag. """ + # Delete existing tag manifest so we can reuse the tag. + TagManifestLabel.delete().execute() + TagManifest.delete().execute() + + # Create a tag with an image referenced missing parent images. + repo = model.repository.get_repository('devtable', 'gargantuan') + broken_image = Image.create(docker_image_id='foo', repository=repo, ancestors='/348723847234/', + storage=ImageStorage.get()) + broken_image_tag = RepositoryTag.create(repository=repo, image=broken_image, name='broken') + + # Backfill the tag. + assert backfill_tag(broken_image_tag) + + # Ensure we backfilled, even though we reference a broken manifest. + tag_manifest = TagManifest.get(tag=broken_image_tag) + + map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) + manifest = map_row.manifest + assert manifest.manifest_bytes == tag_manifest.json_data + + tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag + assert tag.name == 'broken' + assert tag.manifest == manifest