A few small improvements to the tag backfill worker

1) Remove the join on the min ID lookup. This join is incredibly slow and taxing on the database, so we simply set the minimum to the min database ID.
2) Increase the timeout on the worker.
3) Have the manifest backfill verify the contents of the backfilled manifests, to ensure we didn't mis-copy bytes due to the previous unicode issues
This commit is contained in:
Joseph Schorr 2019-01-14 13:43:42 -05:00
parent defd4b3b20
commit b8d60152c4

View file

@ -25,7 +25,7 @@ from util.migrate.allocator import yield_random_entries
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
WORKER_TIMEOUT = 600 WORKER_TIMEOUT = 6000
class BrokenManifest(ManifestInterface): class BrokenManifest(ManifestInterface):
@ -120,11 +120,7 @@ class TagBackfillWorker(Worker):
.join(TagToRepositoryTag, JOIN.LEFT_OUTER) .join(TagToRepositoryTag, JOIN.LEFT_OUTER)
.where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False)) .where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False))
min_id = (RepositoryTag min_id = (RepositoryTag.select(fn.Min(RepositoryTag.id)).scalar())
.select(fn.Min(RepositoryTag.id))
.join(TagToRepositoryTag, JOIN.LEFT_OUTER)
.where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False)
.scalar())
max_id = RepositoryTag.select(fn.Max(RepositoryTag.id)).scalar() max_id = RepositoryTag.select(fn.Max(RepositoryTag.id)).scalar()
iterator = yield_random_entries( iterator = yield_random_entries(
@ -227,7 +223,17 @@ def _get_manifest_id(repositorytag):
return None return None
try: try:
return TagManifestToManifest.get(tag_manifest=tag_manifest).manifest_id found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest
# Verify that the new-style manifest has the same contents as the old-style manifest.
# If not, update and then return. This is an extra check put in place to ensure unicode
# manifests have been correctly copied.
if found.manifest_bytes != tag_manifest.json_data:
logger.warning('Fixing manifest `%s`', found.id)
found.manifest_bytes = tag_manifest.json_data
found.save()
return found.id
except TagManifestToManifest.DoesNotExist: except TagManifestToManifest.DoesNotExist:
# Could not find the new style manifest, so backfill. # Could not find the new style manifest, so backfill.
_backfill_manifest(tag_manifest) _backfill_manifest(tag_manifest)