Merge pull request #3356 from quay/tag-backfill-skip-improvements
Tag backfill improvements
This commit is contained in:
commit
8d722dee81
9 changed files with 79 additions and 18 deletions
|
@ -59,7 +59,8 @@ def get_parent_images(namespace_name, repository_name, image_obj):
|
||||||
id_to_image = {unicode(image.id): image for image in parents}
|
id_to_image = {unicode(image.id): image for image in parents}
|
||||||
try:
|
try:
|
||||||
return [id_to_image[parent_id] for parent_id in reversed(parent_db_ids)]
|
return [id_to_image[parent_id] for parent_id in reversed(parent_db_ids)]
|
||||||
except KeyError:
|
except KeyError as ke:
|
||||||
|
logger.exception('Could not find an expected parent image for image %s', image_obj.id)
|
||||||
raise DataModelException('Unknown parent image')
|
raise DataModelException('Unknown parent image')
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -799,7 +799,8 @@ def populate_manifest(repository, manifest, legacy_image, storage_ids):
|
||||||
manifest_row = Manifest.create(digest=manifest.digest, repository=repository,
|
manifest_row = Manifest.create(digest=manifest.digest, repository=repository,
|
||||||
manifest_bytes=manifest.bytes.as_encoded_str(),
|
manifest_bytes=manifest.bytes.as_encoded_str(),
|
||||||
media_type=media_type)
|
media_type=media_type)
|
||||||
except IntegrityError:
|
except IntegrityError as ie:
|
||||||
|
logger.debug('Got integrity error when trying to write manifest: %s', ie)
|
||||||
return Manifest.get(repository=repository, digest=manifest.digest)
|
return Manifest.get(repository=repository, digest=manifest.digest)
|
||||||
|
|
||||||
ManifestLegacyImage.create(manifest=manifest_row, repository=repository, image=legacy_image)
|
ManifestLegacyImage.create(manifest=manifest_row, repository=repository, image=legacy_image)
|
||||||
|
|
|
@ -478,6 +478,9 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
|
||||||
|
|
||||||
# Write the manifest to the DB.
|
# Write the manifest to the DB.
|
||||||
manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image)
|
manifest = self._build_manifest_for_legacy_image(tag_obj.name, tag_obj.image)
|
||||||
|
if manifest is None:
|
||||||
|
return None
|
||||||
|
|
||||||
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo,
|
blob_query = model.storage.lookup_repo_storages_by_content_checksum(repo,
|
||||||
manifest.checksums)
|
manifest.checksums)
|
||||||
|
|
||||||
|
|
|
@ -418,7 +418,11 @@ class SharedModel:
|
||||||
repo_name = repo.name
|
repo_name = repo.name
|
||||||
|
|
||||||
# Find the v1 metadata for this image and its parents.
|
# Find the v1 metadata for this image and its parents.
|
||||||
parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row)
|
try:
|
||||||
|
parents = model.image.get_parent_images(namespace_name, repo_name, legacy_image_row)
|
||||||
|
except model.DataModelException:
|
||||||
|
logger.exception('Could not load parent images for legacy image %s', legacy_image_row.id)
|
||||||
|
return None
|
||||||
|
|
||||||
# If the manifest is being generated under the library namespace, then we make its namespace
|
# If the manifest is being generated under the library namespace, then we make its namespace
|
||||||
# empty.
|
# empty.
|
||||||
|
|
|
@ -377,7 +377,11 @@ class DockerSchema1Manifest(ManifestInterface):
|
||||||
|
|
||||||
metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY]
|
metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY]
|
||||||
|
|
||||||
v1_metadata = json.loads(metadata_string)
|
try:
|
||||||
|
v1_metadata = json.loads(metadata_string)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
raise MalformedSchema1Manifest('Could not parse metadata string: %s' % metadata_string)
|
||||||
|
|
||||||
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
|
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
|
||||||
command = to_canonical_json(command_list) if command_list else None
|
command = to_canonical_json(command_list) if command_list else None
|
||||||
|
|
||||||
|
@ -536,7 +540,7 @@ class DockerSchema1ManifestBuilder(object):
|
||||||
DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest,
|
DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest,
|
||||||
})
|
})
|
||||||
self._history.append({
|
self._history.append({
|
||||||
DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata,
|
DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata or '{}',
|
||||||
})
|
})
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
|
@ -197,3 +197,19 @@ def test_validate_manifest_with_emoji(with_key):
|
||||||
# Ensure the manifest can be reloaded.
|
# Ensure the manifest can be reloaded.
|
||||||
built_bytes = built.bytes.as_encoded_str()
|
built_bytes = built.bytes.as_encoded_str()
|
||||||
DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
|
DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('with_key', [
|
||||||
|
None,
|
||||||
|
docker_v2_signing_key,
|
||||||
|
])
|
||||||
|
def test_validate_manifest_with_none_metadata_layer(with_key):
|
||||||
|
builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag')
|
||||||
|
builder.add_layer('sha256:abcde', None)
|
||||||
|
|
||||||
|
built = builder.build(with_key, ensure_ascii=False)
|
||||||
|
built._validate()
|
||||||
|
|
||||||
|
# Ensure the manifest can be reloaded.
|
||||||
|
built_bytes = built.bytes.as_encoded_str()
|
||||||
|
DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
|
||||||
|
|
|
@ -134,7 +134,7 @@ class CompletedKeys(object):
|
||||||
def yield_random_entries(batch_query, primary_key_field, batch_size, max_id, min_id=0):
|
def yield_random_entries(batch_query, primary_key_field, batch_size, max_id, min_id=0):
|
||||||
""" This method will yield items from random blocks in the database. We will track metadata
|
""" This method will yield items from random blocks in the database. We will track metadata
|
||||||
about which keys are available for work, and we will complete the backfill when there is no
|
about which keys are available for work, and we will complete the backfill when there is no
|
||||||
more work to be done. The method yields tupes of (candidate, Event), and if the work was
|
more work to be done. The method yields tuples of (candidate, Event), and if the work was
|
||||||
already done by another worker, the caller should set the event. Batch candidates must have
|
already done by another worker, the caller should set the event. Batch candidates must have
|
||||||
an "id" field which can be inspected.
|
an "id" field which can be inspected.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -156,7 +156,7 @@ class TagBackfillWorker(Worker):
|
||||||
|
|
||||||
for candidate, abt, _ in iterator:
|
for candidate, abt, _ in iterator:
|
||||||
if not backfill_tag(candidate):
|
if not backfill_tag(candidate):
|
||||||
logger.info('Another worker pre-empted us for label: %s', candidate.id)
|
logger.info('Another worker pre-empted us for tag: %s', candidate.id)
|
||||||
abt.set()
|
abt.set()
|
||||||
|
|
||||||
|
|
||||||
|
@ -171,14 +171,14 @@ def lookup_map_row(repositorytag):
|
||||||
def backfill_tag(repositorytag):
|
def backfill_tag(repositorytag):
|
||||||
logger.info('Backfilling tag %s', repositorytag.id)
|
logger.info('Backfilling tag %s', repositorytag.id)
|
||||||
|
|
||||||
# Ensure that a mapping row doesn't already exist. If it does, we've been preempted.
|
# Ensure that a mapping row doesn't already exist. If it does, nothing more to do.
|
||||||
if lookup_map_row(repositorytag):
|
if lookup_map_row(repositorytag):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Grab the manifest for the RepositoryTag, backfilling as necessary.
|
# Grab the manifest for the RepositoryTag, backfilling as necessary.
|
||||||
manifest_id = _get_manifest_id(repositorytag)
|
manifest_id = _get_manifest_id(repositorytag)
|
||||||
if manifest_id is None:
|
if manifest_id is None:
|
||||||
return False
|
return True
|
||||||
|
|
||||||
lifetime_start_ms = (repositorytag.lifetime_start_ts * 1000
|
lifetime_start_ms = (repositorytag.lifetime_start_ts * 1000
|
||||||
if repositorytag.lifetime_start_ts is not None else None)
|
if repositorytag.lifetime_start_ts is not None else None)
|
||||||
|
@ -225,15 +225,19 @@ def _get_manifest_id(repositorytag):
|
||||||
manifest_datatype = pre_oci_model.get_manifest_for_tag(repository_tag_datatype,
|
manifest_datatype = pre_oci_model.get_manifest_for_tag(repository_tag_datatype,
|
||||||
backfill_if_necessary=True)
|
backfill_if_necessary=True)
|
||||||
if manifest_datatype is None:
|
if manifest_datatype is None:
|
||||||
logger.error('Missing manifest for tag `%s`', repositorytag.id)
|
logger.error('Could not load or backfill manifest for tag `%s`', repositorytag.id)
|
||||||
return None
|
|
||||||
|
|
||||||
# Retrieve the new-style Manifest for the TagManifest, if any.
|
# Create a broken manifest for the tag.
|
||||||
try:
|
tag_manifest = TagManifest.create(tag=repositorytag,
|
||||||
tag_manifest = TagManifest.get(id=manifest_datatype._db_id)
|
digest='BROKEN-%s' % repositorytag.id,
|
||||||
except TagManifest.DoesNotExist:
|
json_data='{}')
|
||||||
logger.exception('Could not find tag manifest')
|
else:
|
||||||
return None
|
# Retrieve the new-style Manifest for the TagManifest, if any.
|
||||||
|
try:
|
||||||
|
tag_manifest = TagManifest.get(id=manifest_datatype._db_id)
|
||||||
|
except TagManifest.DoesNotExist:
|
||||||
|
logger.exception('Could not find tag manifest')
|
||||||
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest
|
found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest
|
||||||
|
|
|
@ -2,7 +2,8 @@ from app import docker_v2_signing_key
|
||||||
from data import model
|
from data import model
|
||||||
from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob,
|
from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob,
|
||||||
ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image,
|
ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image,
|
||||||
TagManifestLabel, Tag, TagToRepositoryTag, Repository)
|
TagManifestLabel, Tag, TagToRepositoryTag, Repository,
|
||||||
|
ImageStorage)
|
||||||
from image.docker.schema1 import DockerSchema1ManifestBuilder
|
from image.docker.schema1 import DockerSchema1ManifestBuilder
|
||||||
from workers.tagbackfillworker import backfill_tag, _backfill_manifest
|
from workers.tagbackfillworker import backfill_tag, _backfill_manifest
|
||||||
|
|
||||||
|
@ -251,3 +252,30 @@ def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db):
|
||||||
map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2)
|
map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2)
|
||||||
|
|
||||||
assert map_row1.manifest == map_row2.manifest
|
assert map_row1.manifest == map_row2.manifest
|
||||||
|
|
||||||
|
|
||||||
|
def test_manifest_backfill_broken_tag(clear_rows, initialized_db):
|
||||||
|
""" Tests backfilling a broken tag. """
|
||||||
|
# Delete existing tag manifest so we can reuse the tag.
|
||||||
|
TagManifestLabel.delete().execute()
|
||||||
|
TagManifest.delete().execute()
|
||||||
|
|
||||||
|
# Create a tag with an image referenced missing parent images.
|
||||||
|
repo = model.repository.get_repository('devtable', 'gargantuan')
|
||||||
|
broken_image = Image.create(docker_image_id='foo', repository=repo, ancestors='/348723847234/',
|
||||||
|
storage=ImageStorage.get())
|
||||||
|
broken_image_tag = RepositoryTag.create(repository=repo, image=broken_image, name='broken')
|
||||||
|
|
||||||
|
# Backfill the tag.
|
||||||
|
assert backfill_tag(broken_image_tag)
|
||||||
|
|
||||||
|
# Ensure we backfilled, even though we reference a broken manifest.
|
||||||
|
tag_manifest = TagManifest.get(tag=broken_image_tag)
|
||||||
|
|
||||||
|
map_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
|
||||||
|
manifest = map_row.manifest
|
||||||
|
assert manifest.manifest_bytes == tag_manifest.json_data
|
||||||
|
|
||||||
|
tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag
|
||||||
|
assert tag.name == 'broken'
|
||||||
|
assert tag.manifest == manifest
|
||||||
|
|
Reference in a new issue