A few small improvements to the tag backfill worker

1) Remove the join on the min ID lookup. This join is incredibly slow and taxing on the database, so we simply set the minimum to the min database ID.
2) Increase the timeout on the worker.
3) Have the manifest backfill verify the contents of the backfilled manifests, to ensure we didn't mis-copy bytes due to the previous unicode issues
This commit is contained in:
Joseph Schorr 2019-01-14 13:43:42 -05:00
parent defd4b3b20
commit b8d60152c4

View file

@ -25,7 +25,7 @@ from util.migrate.allocator import yield_random_entries
logger = logging.getLogger(__name__)
WORKER_TIMEOUT = 600
WORKER_TIMEOUT = 6000
class BrokenManifest(ManifestInterface):
@ -120,11 +120,7 @@ class TagBackfillWorker(Worker):
.join(TagToRepositoryTag, JOIN.LEFT_OUTER)
.where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False))
min_id = (RepositoryTag
.select(fn.Min(RepositoryTag.id))
.join(TagToRepositoryTag, JOIN.LEFT_OUTER)
.where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False)
.scalar())
min_id = (RepositoryTag.select(fn.Min(RepositoryTag.id)).scalar())
max_id = RepositoryTag.select(fn.Max(RepositoryTag.id)).scalar()
iterator = yield_random_entries(
@ -227,7 +223,17 @@ def _get_manifest_id(repositorytag):
return None
try:
return TagManifestToManifest.get(tag_manifest=tag_manifest).manifest_id
found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest
# Verify that the new-style manifest has the same contents as the old-style manifest.
# If not, update and then return. This is an extra check put in place to ensure unicode
# manifests have been correctly copied.
if found.manifest_bytes != tag_manifest.json_data:
logger.warning('Fixing manifest `%s`', found.id)
found.manifest_bytes = tag_manifest.json_data
found.save()
return found.id
except TagManifestToManifest.DoesNotExist:
# Could not find the new style manifest, so backfill.
_backfill_manifest(tag_manifest)