From b8d60152c4473386ad5aba48ddc8857fb67beae5 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 14 Jan 2019 13:43:42 -0500 Subject: [PATCH] A few small improvements to the tag backfill worker 1) Remove the join on the min ID lookup. This join is incredibly slow and taxing on the database, so we simply set the minimum to the min database ID. 2) Increase the timeout on the worker. 3) Have the manifest backfill verify the contents of the backfilled manifests, to ensure we didn't mis-copy bytes due to the previous unicode issues --- workers/tagbackfillworker.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/workers/tagbackfillworker.py b/workers/tagbackfillworker.py index e949b1dc1..42faddf0f 100644 --- a/workers/tagbackfillworker.py +++ b/workers/tagbackfillworker.py @@ -25,7 +25,7 @@ from util.migrate.allocator import yield_random_entries logger = logging.getLogger(__name__) -WORKER_TIMEOUT = 600 +WORKER_TIMEOUT = 6000 class BrokenManifest(ManifestInterface): @@ -120,11 +120,7 @@ class TagBackfillWorker(Worker): .join(TagToRepositoryTag, JOIN.LEFT_OUTER) .where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False)) - min_id = (RepositoryTag - .select(fn.Min(RepositoryTag.id)) - .join(TagToRepositoryTag, JOIN.LEFT_OUTER) - .where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False) - .scalar()) + min_id = (RepositoryTag.select(fn.Min(RepositoryTag.id)).scalar()) max_id = RepositoryTag.select(fn.Max(RepositoryTag.id)).scalar() iterator = yield_random_entries( @@ -227,7 +223,17 @@ def _get_manifest_id(repositorytag): return None try: - return TagManifestToManifest.get(tag_manifest=tag_manifest).manifest_id + found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest + + # Verify that the new-style manifest has the same contents as the old-style manifest. + # If not, update and then return. This is an extra check put in place to ensure unicode + # manifests have been correctly copied. + if found.manifest_bytes != tag_manifest.json_data: + logger.warning('Fixing manifest `%s`', found.id) + found.manifest_bytes = tag_manifest.json_data + found.save() + + return found.id except TagManifestToManifest.DoesNotExist: # Could not find the new style manifest, so backfill. _backfill_manifest(tag_manifest)