Fix TagManifests with shared digests under the same repository.

TagManifests can (apparently, in very rare scenarios) share manifests with the exact same digests, so we need to support that case in the backfill worker. We also need to remove a unique constraint on the manifest column in the mapping table to support this case.
This commit is contained in:
Joseph Schorr 2018-08-19 23:50:18 -04:00
parent 8a83bf7c81
commit e30b746aef
4 changed files with 91 additions and 5 deletions

View file

@ -1435,7 +1435,7 @@ class TagManifest(BaseModel):
class TagManifestToManifest(BaseModel): class TagManifestToManifest(BaseModel):
""" NOTE: Only used for the duration of the migrations. """ """ NOTE: Only used for the duration of the migrations. """
tag_manifest = ForeignKeyField(TagManifest, index=True, unique=True) tag_manifest = ForeignKeyField(TagManifest, index=True, unique=True)
manifest = ForeignKeyField(Manifest, index=True, unique=True) manifest = ForeignKeyField(Manifest, index=True)
broken = BooleanField(index=True, default=False) broken = BooleanField(index=True, default=False)

View file

@ -0,0 +1,43 @@
"""Remove unique from TagManifestToManifest
Revision ID: 13411de1c0ff
Revises: 654e6df88b71
Create Date: 2018-08-19 23:30:24.969549
"""
# revision identifiers, used by Alembic.
revision = '13411de1c0ff'
down_revision = '654e6df88b71'
from alembic import op
import sqlalchemy as sa
def upgrade(tables, tester):
# Note: Because of a restriction in MySQL, we cannot simply remove the index and re-add
# it without the unique=False, nor can we simply alter the index. To make it work, we'd have to
# remove the primary key on the field, so instead we simply drop the table entirely and
# recreate it with the modified index. The backfill will re-fill this in.
op.drop_table('tagmanifesttomanifest')
op.create_table('tagmanifesttomanifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('tag_manifest_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('broken', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifesttomanifest_manifest_id_manifest')),
sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifesttomanifest_tag_manifest_id_tagmanifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifesttomanifest'))
)
op.create_index('tagmanifesttomanifest_broken', 'tagmanifesttomanifest', ['broken'], unique=False)
op.create_index('tagmanifesttomanifest_manifest_id', 'tagmanifesttomanifest', ['manifest_id'], unique=False)
op.create_index('tagmanifesttomanifest_tag_manifest_id', 'tagmanifesttomanifest', ['tag_manifest_id'], unique=True)
tester.populate_table('tagmanifesttomanifest', [
('manifest_id', tester.TestDataType.Foreign('manifest')),
('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')),
])
def downgrade(tables, tester):
pass

View file

@ -5,7 +5,7 @@ from peewee import JOIN, fn, IntegrityError
from app import app from app import app
from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest, Image, from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest, Image,
db_transaction) Manifest, db_transaction)
from data.model.image import get_parent_images from data.model.image import get_parent_images
from data.model.tag import populate_manifest from data.model.tag import populate_manifest
from data.model.blob import get_repo_blob_by_digest, BlobDoesNotExist from data.model.blob import get_repo_blob_by_digest, BlobDoesNotExist
@ -158,9 +158,22 @@ def backfill_manifest(tag_manifest):
except TagManifest.DoesNotExist: except TagManifest.DoesNotExist:
return True return True
# Create the new-style rows for the manifest. # Ensure it wasn't already created.
manifest_row = populate_manifest(tag_manifest.tag.repository, manifest, if lookup_map_row(tag_manifest):
tag_manifest.tag.image, storage_ids) return False
# Check for a pre-existing manifest matching the digest in the repository. This can happen
# if we've already created the manifest row (typically for tag reverision).
try:
manifest_row = Manifest.get(digest=manifest.digest, repository=tag_manifest.tag.repository)
except Manifest.DoesNotExist:
# Create the new-style rows for the manifest.
try:
manifest_row = populate_manifest(tag_manifest.tag.repository, manifest,
tag_manifest.tag.image, storage_ids)
except IntegrityError:
# Pre-empted.
return False
# Create the mapping row. If we find another was created for this tag manifest in the # Create the mapping row. If we find another was created for this tag manifest in the
# meantime, then we've been preempted. # meantime, then we've been preempted.

View file

@ -138,3 +138,33 @@ def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initializ
manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row)) manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))
assert len(manifest_blobs) == 0 assert len(manifest_blobs) == 0
def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db):
""" Tests that a manifest with a shared digest will be properly linked. """
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
repo = model.repository.get_repository('devtable', 'gargantuan')
tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0')
tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0')
# Build a manifest and assign it to both tags (this is allowed in the old model).
builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag')
builder.add_layer('sha256:deadbeef', '{"id": "foo"}')
manifest = builder.build(docker_v2_signing_key)
manifest_1 = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest,
tag=tag_v30)
manifest_2 = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest,
tag=tag_v50)
# Backfill "both" manifests and ensure both are pointed to by a single resulting row.
assert backfill_manifest(manifest_1)
assert backfill_manifest(manifest_2)
map_row1 = TagManifestToManifest.get(tag_manifest=manifest_1)
map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2)
assert map_row1.manifest == map_row2.manifest