Merge pull request #3356 from quay/tag-backfill-skip-improvements

Tag backfill improvements
This commit is contained in:
Joseph Schorr 2019-02-07 13:52:32 -05:00 committed by GitHub
commit 8d722dee81
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 79 additions and 18 deletions

View file

@ -59,7 +59,8 @@ def get_parent_images(namespace_name, repository_name, image_obj):
id_to_image = {unicode(image.id): image for image in parents} id_to_image = {unicode(image.id): image for image in parents}
try: try:
return [id_to_image[parent_id] for parent_id in reversed(parent_db_ids)] return [id_to_image[parent_id] for parent_id in reversed(parent_db_ids)]
except KeyError: except KeyError as ke:
logger.exception('Could not find an expected parent image for image %s', image_obj.id)
raise DataModelException('Unknown parent image') raise DataModelException('Unknown parent image')

View file

@ -799,7 +799,8 @@ def populate_manifest(repository, manifest, legacy_image, storage_ids):
manifest_row = Manifest.create(digest=manifest.digest, repository=repository, manifest_row = Manifest.create(digest=manifest.digest, repository=repository,
manifest_bytes=manifest.bytes.as_encoded_str(), manifest_bytes=manifest.bytes.as_encoded_str(),
media_type=media_type) media_type=media_type)
except IntegrityError: except IntegrityError as ie:
logger.debug('Got integrity error when trying to write manifest: %s', ie)
return Manifest.get(repository=repository, digest=manifest.digest) return Manifest.get(repository=repository, digest=manifest.digest)
ManifestLegacyImage.create(manifest=manifest_row, repository=repository, image=legacy_image) ManifestLegacyImage.create(manifest=manifest_row, repository=repository, image=legacy_image)

View file

@ -478,6 +478,9 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
# Write the manifest to the DB. # Write the manifest to the DB.
manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image) manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image)
if manifest is None:
return None
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo, blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo,
manifest.checksums) manifest.checksums)

View file

@ -418,7 +418,11 @@ class SharedModel:
repo_name = repo.name repo_name = repo.name
# Find the v1 metadata for this image and its parents. # Find the v1 metadata for this image and its parents.
parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row) try:
parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row)
except model.DataModelException:
logger.exception('Could not load parent images for legacy image %s', legacy_image_row.id)
return None
# If the manifest is being generated under the library namespace, then we make its namespace # If the manifest is being generated under the library namespace, then we make its namespace
# empty. # empty.

View file

@ -377,7 +377,11 @@ class DockerSchema1Manifest(ManifestInterface):
metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY] metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY]
v1_metadata = json.loads(metadata_string) try:
v1_metadata = json.loads(metadata_string)
except (ValueError, TypeError):
raise MalformedSchema1Manifest('Could not parse metadata string: %s' % metadata_string)
command_list = v1_metadata.get('container_config', {}).get('Cmd', None) command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
command = to_canonical_json(command_list) if command_list else None command = to_canonical_json(command_list) if command_list else None
@ -536,7 +540,7 @@ class DockerSchema1ManifestBuilder(object):
DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest, DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest,
}) })
self._history.append({ self._history.append({
DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata, DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata or '{}',
}) })
return self return self

View file

@ -197,3 +197,19 @@ def test_validate_manifest_with_emoji(with_key):
# Ensure the manifest can be reloaded. # Ensure the manifest can be reloaded.
built_bytes = built.bytes.as_encoded_str() built_bytes = built.bytes.as_encoded_str()
DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes)) DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
@pytest.mark.parametrize('with_key', [
None,
docker_v2_signing_key,
])
def test_validate_manifest_with_none_metadata_layer(with_key):
builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag')
builder.add_layer('sha256:abcde', None)
built = builder.build(with_key, ensure_ascii=False)
built._validate()
# Ensure the manifest can be reloaded.
built_bytes = built.bytes.as_encoded_str()
DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))

View file

@ -134,7 +134,7 @@ class CompletedKeys(object):
def yield_random_entries(batch_query, primary_key_field, batch_size, max_id, min_id=0): def yield_random_entries(batch_query, primary_key_field, batch_size, max_id, min_id=0):
""" This method will yield items from random blocks in the database. We will track metadata """ This method will yield items from random blocks in the database. We will track metadata
about which keys are available for work, and we will complete the backfill when there is no about which keys are available for work, and we will complete the backfill when there is no
more work to be done. The method yields tupes of (candidate, Event), and if the work was more work to be done. The method yields tuples of (candidate, Event), and if the work was
already done by another worker, the caller should set the event. Batch candidates must have already done by another worker, the caller should set the event. Batch candidates must have
an "id" field which can be inspected. an "id" field which can be inspected.
""" """

View file

@ -156,7 +156,7 @@ class TagBackfillWorker(Worker):
for candidate, abt, _ in iterator: for candidate, abt, _ in iterator:
if not backfill_tag(candidate): if not backfill_tag(candidate):
logger.info('Another worker pre-empted us for label: %s', candidate.id) logger.info('Another worker pre-empted us for tag: %s', candidate.id)
abt.set() abt.set()
@ -171,14 +171,14 @@ def lookup_map_row(repositorytag):
def backfill_tag(repositorytag): def backfill_tag(repositorytag):
logger.info('Backfilling tag %s', repositorytag.id) logger.info('Backfilling tag %s', repositorytag.id)
# Ensure that a mapping row doesn't already exist. If it does, we've been preempted. # Ensure that a mapping row doesn't already exist. If it does, nothing more to do.
if lookup_map_row(repositorytag): if lookup_map_row(repositorytag):
return False return False
# Grab the manifest for the RepositoryTag, backfilling as necessary. # Grab the manifest for the RepositoryTag, backfilling as necessary.
manifest_id = _get_manifest_id(repositorytag) manifest_id = _get_manifest_id(repositorytag)
if manifest_id is None: if manifest_id is None:
return False return True
lifetime_start_ms = (repositorytag.lifetime_start_ts * 1000 lifetime_start_ms = (repositorytag.lifetime_start_ts * 1000
if repositorytag.lifetime_start_ts is not None else None) if repositorytag.lifetime_start_ts is not None else None)
@ -225,15 +225,19 @@ def _get_manifest_id(repositorytag):
manifest_datatype = pre_oci_model.get_manifest_for_tag(repository_tag_datatype, manifest_datatype = pre_oci_model.get_manifest_for_tag(repository_tag_datatype,
backfill_if_necessary=True) backfill_if_necessary=True)
if manifest_datatype is None: if manifest_datatype is None:
logger.error('Missing manifest for tag `%s`', repositorytag.id) logger.error('Could not load or backfill manifest for tag `%s`', repositorytag.id)
return None
# Retrieve the new-style Manifest for the TagManifest, if any. # Create a broken manifest for the tag.
try: tag_manifest = TagManifest.create(tag=repositorytag,
tag_manifest = TagManifest.get(id=manifest_datatype._db_id) digest='BROKEN-%s' % repositorytag.id,
except TagManifest.DoesNotExist: json_data='{}')
logger.exception('Could not find tag manifest') else:
return None # Retrieve the new-style Manifest for the TagManifest, if any.
try:
tag_manifest = TagManifest.get(id=manifest_datatype._db_id)
except TagManifest.DoesNotExist:
logger.exception('Could not find tag manifest')
return None
try: try:
found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest

View file

@ -2,7 +2,8 @@ from app import docker_v2_signing_key
from data import model from data import model
from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob, from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob,
ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image, ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image,
TagManifestLabel, Tag, TagToRepositoryTag, Repository) TagManifestLabel, Tag, TagToRepositoryTag, Repository,
ImageStorage)
from image.docker.schema1 import DockerSchema1ManifestBuilder from image.docker.schema1 import DockerSchema1ManifestBuilder
from workers.tagbackfillworker import backfill_tag, _backfill_manifest from workers.tagbackfillworker import backfill_tag, _backfill_manifest
@ -251,3 +252,30 @@ def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db):
map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2) map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2)
assert map_row1.manifest == map_row2.manifest assert map_row1.manifest == map_row2.manifest
def test_manifest_backfill_broken_tag(clear_rows, initialized_db):
""" Tests backfilling a broken tag. """
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
# Create a tag with an image referenced missing parent images.
repo = model.repository.get_repository('devtable', 'gargantuan')
broken_image = Image.create(docker_image_id='foo', repository=repo, ancestors='/348723847234/',
storage=ImageStorage.get())
broken_image_tag = RepositoryTag.create(repository=repo, image=broken_image, name='broken')
# Backfill the tag.
assert backfill_tag(broken_image_tag)
# Ensure we backfilled, even though we reference a broken manifest.
tag_manifest = TagManifest.get(tag=broken_image_tag)
map_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
manifest = map_row.manifest
assert manifest.manifest_bytes == tag_manifest.json_data
tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag
assert tag.name == 'broken'
assert tag.manifest == manifest