Merge pull request #3302 from quay/joseph.schorr/QUAY-1017/tag-backfill

Tag backfill worker
This commit is contained in:
Joseph Schorr 2018-12-11 13:31:35 -05:00 committed by GitHub
commit be2cece7b0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 220 additions and 67 deletions

View file

@ -1,4 +0,0 @@
#!/bin/sh
# Start the logger
exec logger -i -t manifestbackfillworker

View file

@ -1,9 +0,0 @@
#! /bin/bash
echo 'Starting manifest backfill worker'
QUAYPATH=${QUAYPATH:-"."}
cd ${QUAYDIR:-"/"}
PYTHONPATH=$QUAYPATH venv/bin/python -m workers.manifestbackfillworker 2>&1
echo 'Repository manifest backfill exited'

View file

@ -0,0 +1,4 @@
#!/bin/sh
# Start the logger
exec logger -i -t tagbackfillworker

View file

@ -0,0 +1,9 @@
#! /bin/bash
echo 'Starting tag backfill worker'
QUAYPATH=${QUAYPATH:-"."}
cd ${QUAYDIR:-"/"}
PYTHONPATH=$QUAYPATH venv/bin/python -m workers.tagbackfillworker 2>&1
echo 'Repository tag backfill exited'

View file

@ -64,7 +64,7 @@ class PreOCIModel(SharedModel, RegistryDataInterface):
if backfill_if_necessary:
return self.backfill_manifest_for_tag(tag)
return
return None
return Manifest.for_tag_manifest(tag_manifest)

View file

@ -1,20 +1,23 @@
import logging
import logging.config
import time
import time
from peewee import JOIN, fn, IntegrityError
from app import app
from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest, Image,
Manifest, db_transaction)
from data.database import (UseThenDisconnect, TagToRepositoryTag, RepositoryTag,
TagManifestToManifest, Tag, TagManifest, TagManifestToManifest, Image,
Manifest, TagManifestLabel, ManifestLabel, TagManifestLabelMap, db_transaction)
from data.model import DataModelException
from data.model.image import get_parent_images
from data.model.tag import populate_manifest
from data.model.blob import get_repo_blob_by_digest, BlobDoesNotExist
from data.registry_model import pre_oci_model
from data.registry_model.datatypes import Tag as TagDataType
from image.docker.schema1 import (DockerSchema1Manifest, ManifestException, ManifestInterface,
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE)
from workers.worker import Worker
from util.log import logfile_path
from util.migrate.allocator import yield_random_entries
@ -23,6 +26,7 @@ logger = logging.getLogger(__name__)
WORKER_TIMEOUT = 600
class BrokenManifest(ManifestInterface):
""" Implementation of the ManifestInterface for "broken" manifests. This allows us to add the
new manifest row while not adding any additional rows for it.
@ -98,54 +102,104 @@ class BrokenManifest(ManifestInterface):
def get_requires_empty_layer_blob(self, content_retriever):
return False
def convert_manifest(self, media_types, namespace_name, repo_name, tag_name, lookup_fn):
def convert_manifest(self, allowed_mediatypes, namespace_name, repo_name, tag_name,
content_retriever):
return None
class ManifestBackfillWorker(Worker):
class TagBackfillWorker(Worker):
def __init__(self):
super(ManifestBackfillWorker, self).__init__()
self.add_operation(self._backfill_manifests, WORKER_TIMEOUT)
super(TagBackfillWorker, self).__init__()
self.add_operation(self._backfill_tags, WORKER_TIMEOUT)
def _candidates_to_backfill(self):
def missing_tmt_query():
return (TagManifest
return (RepositoryTag
.select()
.join(TagManifestToManifest, JOIN.LEFT_OUTER)
.where(TagManifestToManifest.id >> None))
.join(TagToRepositoryTag, JOIN.LEFT_OUTER)
.where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False))
min_id = (TagManifest
.select(fn.Min(TagManifest.id))
.join(TagManifestToManifest, JOIN.LEFT_OUTER)
.where(TagManifestToManifest.id >> None)
min_id = (RepositoryTag
.select(fn.Min(RepositoryTag.id))
.join(TagToRepositoryTag, JOIN.LEFT_OUTER)
.where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False)
.scalar())
max_id = TagManifest.select(fn.Max(TagManifest.id)).scalar()
max_id = RepositoryTag.select(fn.Max(RepositoryTag.id)).scalar()
iterator = yield_random_entries(
missing_tmt_query,
TagManifest.id,
100,
RepositoryTag.id,
1000,
max_id,
min_id,
)
return iterator
def _backfill_manifests(self):
def _backfill_tags(self):
with UseThenDisconnect(app.config):
iterator = self._candidates_to_backfill()
if iterator is None:
logger.debug('Found no additional manifest to backfill')
logger.debug('Found no additional tags to backfill')
time.sleep(10000)
return None
for candidate, abt, _ in iterator:
if not backfill_manifest(candidate):
logger.info('Another worker pre-empted us for manifest: %s', candidate.id)
if not backfill_tag(candidate):
logger.info('Another worker pre-empted us for label: %s', candidate.id)
abt.set()
def lookup_map_row(tag_manifest):
def lookup_map_row(repositorytag):
try:
TagToRepositoryTag.get(repository_tag=repositorytag)
return True
except TagToRepositoryTag.DoesNotExist:
return False
def backfill_tag(repositorytag):
logger.info('Backfilling tag %s', repositorytag.id)
# Ensure that a mapping row doesn't already exist. If it does, we've been preempted.
if lookup_map_row(repositorytag):
return False
# Grab the manifest for the RepositoryTag, backfilling is necessary.
manifest_id = _get_manifest_id(repositorytag)
if manifest_id is None:
return False
lifetime_start_ms = (repositorytag.lifetime_start_ts * 1000
if repositorytag.lifetime_start_ts else None)
lifetime_end_ms = (repositorytag.lifetime_end_ts * 1000
if repositorytag.lifetime_end_ts else None)
# Create the new Tag.
with db_transaction():
if lookup_map_row(repositorytag):
return False
try:
created = Tag.create(name=repositorytag.name,
repository=repositorytag.repository,
lifetime_start_ms=lifetime_start_ms,
lifetime_end_ms=lifetime_end_ms,
reversion=repositorytag.reversion,
manifest=manifest_id,
tag_kind=Tag.tag_kind.get_id('tag'))
TagToRepositoryTag.create(tag=created, repository_tag=repositorytag,
repository=repositorytag.repository)
except IntegrityError:
logger.exception('Could not create tag for repo tag `%s`', repositorytag.id)
return False
logger.info('Backfilled tag %s', repositorytag.id)
return True
def lookup_manifest_map_row(tag_manifest):
try:
TagManifestToManifest.get(tag_manifest=tag_manifest)
return True
@ -153,11 +207,42 @@ def lookup_map_row(tag_manifest):
return False
def backfill_manifest(tag_manifest):
logger.info('Backfilling manifest %s', tag_manifest.id)
def _get_manifest_id(repositorytag):
repository_tag_datatype = TagDataType.for_repository_tag(repositorytag)
# Retrieve the TagManifest for the RepositoryTag, backfilling if necessary.
with db_transaction():
manifest_datatype = pre_oci_model.get_manifest_for_tag(repository_tag_datatype,
backfill_if_necessary=True)
if manifest_datatype is None:
logger.error('Missing manifest for tag `%s`', repositorytag.id)
return None
# Retrieve the new-style Manifest for the TagManifest, if any.
try:
tag_manifest = TagManifest.get(id=manifest_datatype._db_id)
except TagManifest.DoesNotExist:
logger.exception('Could not find tag manifest')
return None
try:
return TagManifestToManifest.get(tag_manifest=tag_manifest).manifest_id
except TagManifestToManifest.DoesNotExist:
# Could not find the new style manifest, so backfill.
_backfill_manifest(tag_manifest)
# Try to retrieve the manifest again, since we've performed a backfill.
try:
return TagManifestToManifest.get(tag_manifest=tag_manifest).manifest_id
except TagManifestToManifest.DoesNotExist:
return None
def _backfill_manifest(tag_manifest):
logger.info('Backfilling manifest for tag manifest %s', tag_manifest.id)
# Ensure that a mapping row doesn't already exist. If it does, we've been preempted.
if lookup_map_row(tag_manifest):
if lookup_manifest_map_row(tag_manifest):
return False
# Parse the manifest. If we cannot parse, then we treat the manifest as broken and just emit it
@ -214,7 +299,7 @@ def backfill_manifest(tag_manifest):
return True
# Ensure it wasn't already created.
if lookup_map_row(tag_manifest):
if lookup_manifest_map_row(tag_manifest):
return False
# Check for a pre-existing manifest matching the digest in the repository. This can happen
@ -235,18 +320,46 @@ def backfill_manifest(tag_manifest):
try:
TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest_row,
broken=is_broken)
return True
except IntegrityError:
return False
# Backfill any labels on the manifest.
_backfill_labels(tag_manifest, manifest_row, repository)
return True
def _backfill_labels(tag_manifest, manifest, repository):
tmls = list(TagManifestLabel.select().where(TagManifestLabel.annotated == tag_manifest))
if not tmls:
return
for tag_manifest_label in tmls:
label = tag_manifest_label.label
try:
TagManifestLabelMap.get(tag_manifest_label=tag_manifest_label)
continue
except TagManifestLabelMap.DoesNotExist:
pass
try:
manifest_label = ManifestLabel.create(manifest=manifest, label=label,
repository=repository)
TagManifestLabelMap.create(manifest_label=manifest_label,
tag_manifest_label=tag_manifest_label,
label=label,
manifest=manifest,
tag_manifest=tag_manifest_label.annotated)
except IntegrityError:
continue
if __name__ == "__main__":
logging.config.fileConfig(logfile_path(debug=False), disable_existing_loggers=False)
if not app.config.get('BACKFILL_TAG_MANIFESTS', False):
logger.debug('Manifest backfill disabled; skipping')
if not app.config.get('BACKFILL_TAGS', False):
logger.debug('Tag backfill disabled; skipping')
while True:
time.sleep(100000)
worker = ManifestBackfillWorker()
worker = TagBackfillWorker()
worker.start()

View file

@ -4,11 +4,11 @@ from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest,
ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image,
TagManifestLabel, Tag, TagToRepositoryTag)
from image.docker.schema1 import DockerSchema1ManifestBuilder
from workers.manifestbackfillworker import backfill_manifest
from workers.labelbackfillworker import backfill_label
from workers.tagbackfillworker import backfill_tag, _backfill_manifest
from test.fixtures import *
@pytest.fixture()
def clear_rows(initialized_db):
# Remove all new-style rows so we can backfill.
@ -22,15 +22,56 @@ def clear_rows(initialized_db):
Manifest.delete().execute()
def test_manifestbackfillworker(clear_rows, initialized_db):
for tag_manifest in TagManifest.select():
# Backfill the manifest.
assert backfill_manifest(tag_manifest)
@pytest.mark.parametrize('clear_all_rows', [
True,
False,
])
def test_tagbackfillworker(clear_all_rows, initialized_db):
# Remove the new-style rows so we can backfill.
TagToRepositoryTag.delete().execute()
Tag.delete().execute()
if clear_all_rows:
TagManifestLabelMap.delete().execute()
ManifestLabel.delete().execute()
ManifestBlob.delete().execute()
ManifestLegacyImage.delete().execute()
TagManifestToManifest.delete().execute()
Manifest.delete().execute()
for repository_tag in list(RepositoryTag.select()):
# Backfill the tag.
assert backfill_tag(repository_tag)
# Ensure if we try again, the backfill is skipped.
assert not backfill_manifest(tag_manifest)
assert not backfill_tag(repository_tag)
# Ensure that we now have the expected tag rows.
tag_to_repo_tag = TagToRepositoryTag.get(repository_tag=repository_tag)
tag = tag_to_repo_tag.tag
assert tag.name == repository_tag.name
assert tag.repository == repository_tag.repository
assert not tag.hidden
assert tag.reversion == repository_tag.reversion
if repository_tag.lifetime_start_ts is None:
assert tag.lifetime_start_ms is None
else:
assert tag.lifetime_start_ms == (repository_tag.lifetime_start_ts * 1000)
if repository_tag.lifetime_end_ts is None:
assert tag.lifetime_end_ms is None
else:
assert tag.lifetime_end_ms == (repository_tag.lifetime_end_ts * 1000)
assert tag.manifest
# Ensure that we now have the expected manifest rows.
try:
tag_manifest = TagManifest.get(tag=repository_tag)
except TagManifest.DoesNotExist:
continue
map_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
assert not map_row.broken
@ -39,6 +80,8 @@ def test_manifestbackfillworker(clear_rows, initialized_db):
assert manifest_row.digest == tag_manifest.digest
assert manifest_row.repository == tag_manifest.tag.repository
assert tag.manifest == map_row.manifest
legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
assert tag_manifest.tag.image == legacy_image
@ -50,15 +93,12 @@ def test_manifestbackfillworker(clear_rows, initialized_db):
in ManifestBlob.select().where(ManifestBlob.manifest == manifest_row)}
assert expected_storages == found_storages
# Ensure that backfilling labels now works.
for tml in TagManifestLabel.select().where(TagManifestLabel.annotated == tag_manifest):
assert backfill_label(tml)
label_map = TagManifestLabelMap.get(tag_manifest_label=tml)
assert label_map.tag_manifest == tag_manifest
assert label_map.manifest == manifest_row
assert label_map.manifest_label.label == label_map.tag_manifest_label.label
assert label_map.label == tml.label
# Ensure the labels were copied over.
tmls = list(TagManifestLabel.select().where(TagManifestLabel.annotated == tag_manifest))
expected_labels = {tml.label_id for tml in tmls}
found_labels = {m.label_id for m
in ManifestLabel.select().where(ManifestLabel.manifest == manifest_row)}
assert found_labels == expected_labels
def test_manifestbackfillworker_broken_manifest(clear_rows, initialized_db):
@ -71,7 +111,7 @@ def test_manifestbackfillworker_broken_manifest(clear_rows, initialized_db):
tag=RepositoryTag.get())
# Ensure the backfill works.
assert backfill_manifest(broken_manifest)
assert _backfill_manifest(broken_manifest)
# Ensure the mapping is marked as broken.
map_row = TagManifestToManifest.get(tag_manifest=broken_manifest)
@ -106,7 +146,7 @@ def test_manifestbackfillworker_mislinked_manifest(clear_rows, initialized_db):
tag=tag_v50)
# Backfill the manifest and ensure its proper content checksum was linked.
assert backfill_manifest(mislinked_manifest)
assert _backfill_manifest(mislinked_manifest)
map_row = TagManifestToManifest.get(tag_manifest=mislinked_manifest)
assert not map_row.broken
@ -140,7 +180,7 @@ def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initializ
tag=tag_v50)
# Backfill the manifest and ensure it is marked as broken.
assert backfill_manifest(broken_manifest)
assert _backfill_manifest(broken_manifest)
map_row = TagManifestToManifest.get(tag_manifest=broken_manifest)
assert map_row.broken
@ -174,8 +214,8 @@ def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db):
tag=tag_v50)
# Backfill "both" manifests and ensure both are pointed to by a single resulting row.
assert backfill_manifest(manifest_1)
assert backfill_manifest(manifest_2)
assert _backfill_manifest(manifest_1)
assert _backfill_manifest(manifest_2)
map_row1 = TagManifestToManifest.get(tag_manifest=manifest_1)
map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2)