diff --git a/workers/tagbackfillworker.py b/workers/tagbackfillworker.py index 42faddf0f..a7115e919 100644 --- a/workers/tagbackfillworker.py +++ b/workers/tagbackfillworker.py @@ -8,11 +8,13 @@ from peewee import JOIN, fn, IntegrityError from app import app from data.database import (UseThenDisconnect, TagToRepositoryTag, RepositoryTag, TagManifestToManifest, Tag, TagManifest, TagManifestToManifest, Image, - Manifest, TagManifestLabel, ManifestLabel, TagManifestLabelMap, db_transaction) + Manifest, TagManifestLabel, ManifestLabel, TagManifestLabelMap, + Repository, db_transaction) from data.model import DataModelException from data.model.image import get_parent_images from data.model.tag import populate_manifest from data.model.blob import get_repo_blob_by_digest, BlobDoesNotExist +from data.model.user import get_namespace_user from data.registry_model import pre_oci_model from data.registry_model.datatypes import Tag as TagDataType from image.docker.schema1 import (DockerSchema1Manifest, ManifestException, ManifestInterface, @@ -109,19 +111,30 @@ class BrokenManifest(ManifestInterface): class TagBackfillWorker(Worker): - def __init__(self): + def __init__(self, namespace_filter=None): super(TagBackfillWorker, self).__init__() + self._namespace_filter = namespace_filter + self.add_operation(self._backfill_tags, WORKER_TIMEOUT) + def _filter(self, query): + if self._namespace_filter: + logger.info('Filtering by namespace `%s`', self._namespace_filter) + namespace_user = get_namespace_user(self._namespace_filter) + query = query.join(Repository).where(Repository.namespace_user == namespace_user) + + return query + def _candidates_to_backfill(self): def missing_tmt_query(): - return (RepositoryTag - .select() + return (self._filter(RepositoryTag.select()) .join(TagToRepositoryTag, JOIN.LEFT_OUTER) .where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False)) - min_id = (RepositoryTag.select(fn.Min(RepositoryTag.id)).scalar()) - max_id = RepositoryTag.select(fn.Max(RepositoryTag.id)).scalar() + min_id = (self._filter(RepositoryTag.select(fn.Min(RepositoryTag.id))).scalar()) + max_id = self._filter(RepositoryTag.select(fn.Max(RepositoryTag.id))).scalar() + + logger.info('Found candidate range %s-%s', min_id, max_id) iterator = yield_random_entries( missing_tmt_query, @@ -368,5 +381,5 @@ if __name__ == "__main__": while True: time.sleep(100000) - worker = TagBackfillWorker() + worker = TagBackfillWorker(app.config.get('BACKFILL_TAGS_NAMESPACE')) worker.start() diff --git a/workers/test/test_tagbackfillworker.py b/workers/test/test_tagbackfillworker.py index 1a14a0f30..6ee046b28 100644 --- a/workers/test/test_tagbackfillworker.py +++ b/workers/test/test_tagbackfillworker.py @@ -2,7 +2,7 @@ from app import docker_v2_signing_key from data import model from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob, ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image, - TagManifestLabel, Tag, TagToRepositoryTag) + TagManifestLabel, Tag, TagToRepositoryTag, Repository) from image.docker.schema1 import DockerSchema1ManifestBuilder from workers.tagbackfillworker import backfill_tag, _backfill_manifest @@ -39,6 +39,8 @@ def test_tagbackfillworker(clear_all_rows, initialized_db): TagManifestToManifest.delete().execute() Manifest.delete().execute() + found_dead_tag = False + for repository_tag in list(RepositoryTag.select()): # Backfill the tag. assert backfill_tag(repository_tag) @@ -63,6 +65,7 @@ def test_tagbackfillworker(clear_all_rows, initialized_db): assert tag.lifetime_end_ms is None else: assert tag.lifetime_end_ms == (repository_tag.lifetime_end_ts * 1000) + found_dead_tag = True assert tag.manifest @@ -100,6 +103,16 @@ def test_tagbackfillworker(clear_all_rows, initialized_db): in ManifestLabel.select().where(ManifestLabel.manifest == manifest_row)} assert found_labels == expected_labels + # Verify at the repository level. + for repository in list(Repository.select()): + tags = RepositoryTag.select().where(RepositoryTag.repository == repository, + RepositoryTag.hidden == False) + oci_tags = Tag.select().where(Tag.repository == repository) + assert len(tags) == len(oci_tags) + assert {t.name for t in tags} == {t.name for t in oci_tags} + + assert found_dead_tag + def test_manifestbackfillworker_broken_manifest(clear_rows, initialized_db): # Delete existing tag manifest so we can reuse the tag.