Merge pull request #3211 from quay/backfill-shared-manifests

Fix TagManifests with shared digests under the same repository.
This commit is contained in:
Joseph Schorr 2018-08-20 15:31:24 -04:00 committed by GitHub
commit b096e793fd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 110 additions and 13 deletions

View file

@ -1435,7 +1435,7 @@ class TagManifest(BaseModel):
class TagManifestToManifest(BaseModel):
""" NOTE: Only used for the duration of the migrations. """
tag_manifest = ForeignKeyField(TagManifest, index=True, unique=True)
manifest = ForeignKeyField(Manifest, index=True, unique=True)
manifest = ForeignKeyField(Manifest, index=True)
broken = BooleanField(index=True, default=False)

View file

@ -0,0 +1,43 @@
"""Remove unique from TagManifestToManifest
Revision ID: 13411de1c0ff
Revises: 654e6df88b71
Create Date: 2018-08-19 23:30:24.969549
"""
# revision identifiers, used by Alembic.
revision = '13411de1c0ff'
down_revision = '654e6df88b71'
from alembic import op
import sqlalchemy as sa
def upgrade(tables, tester):
# Note: Because of a restriction in MySQL, we cannot simply remove the index and re-add
# it without the unique=False, nor can we simply alter the index. To make it work, we'd have to
# remove the primary key on the field, so instead we simply drop the table entirely and
# recreate it with the modified index. The backfill will re-fill this in.
op.drop_table('tagmanifesttomanifest')
op.create_table('tagmanifesttomanifest',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('tag_manifest_id', sa.Integer(), nullable=False),
sa.Column('manifest_id', sa.Integer(), nullable=False),
sa.Column('broken', sa.Boolean(), nullable=False, server_default=sa.sql.expression.false()),
sa.ForeignKeyConstraint(['manifest_id'], ['manifest.id'], name=op.f('fk_tagmanifesttomanifest_manifest_id_manifest')),
sa.ForeignKeyConstraint(['tag_manifest_id'], ['tagmanifest.id'], name=op.f('fk_tagmanifesttomanifest_tag_manifest_id_tagmanifest')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_tagmanifesttomanifest'))
)
op.create_index('tagmanifesttomanifest_broken', 'tagmanifesttomanifest', ['broken'], unique=False)
op.create_index('tagmanifesttomanifest_manifest_id', 'tagmanifesttomanifest', ['manifest_id'], unique=False)
op.create_index('tagmanifesttomanifest_tag_manifest_id', 'tagmanifesttomanifest', ['tag_manifest_id'], unique=True)
tester.populate_table('tagmanifesttomanifest', [
('manifest_id', tester.TestDataType.Foreign('manifest')),
('tag_manifest_id', tester.TestDataType.Foreign('tagmanifest')),
])
def downgrade(tables, tester):
pass

View file

@ -12,9 +12,10 @@ def pre_oci_model(initialized_db):
@pytest.mark.parametrize('names, expected', [
(['unknown'], None),
(['latest'], 'latest'),
(['latest', 'prod'], 'latest'),
(['foo', 'prod'], 'prod'),
(['latest'], {'latest'}),
(['latest', 'prod'], {'latest', 'prod'}),
(['latest', 'prod', 'another'], {'latest', 'prod'}),
(['foo', 'prod'], {'prod'}),
])
def test_find_matching_tag(names, expected, pre_oci_model):
repo = model.repository.get_repository('devtable', 'simple')
@ -23,12 +24,12 @@ def test_find_matching_tag(names, expected, pre_oci_model):
if expected is None:
assert found is None
else:
assert found.name == expected
assert found.name in expected
@pytest.mark.parametrize('repo_namespace, repo_name, expected', [
('devtable', 'simple', 'latest'),
('buynlarge', 'orgrepo', 'latest'),
('devtable', 'simple', {'latest'}),
('buynlarge', 'orgrepo', {'latest', 'prod'}),
])
def test_get_most_recent_tag(repo_namespace, repo_name, expected, pre_oci_model):
repo = model.repository.get_repository(repo_namespace, repo_name)
@ -37,4 +38,4 @@ def test_get_most_recent_tag(repo_namespace, repo_name, expected, pre_oci_model)
if expected is None:
assert found is None
else:
assert found.name == expected
assert found.name in expected

View file

@ -5,7 +5,8 @@ from peewee import JOIN, fn, IntegrityError
from app import app
from data.database import (UseThenDisconnect, TagManifest, TagManifestToManifest, Image,
db_transaction)
Manifest, db_transaction)
from data.model import DataModelException
from data.model.image import get_parent_images
from data.model.tag import populate_manifest
from data.model.blob import get_repo_blob_by_digest, BlobDoesNotExist
@ -129,7 +130,16 @@ def backfill_manifest(tag_manifest):
repository = tag_manifest.tag.repository
image_storage_id_map = {root_image.storage.content_checksum: root_image.storage.id}
parent_images = get_parent_images(repository.namespace_user.username, repository.name, root_image)
try:
parent_images = get_parent_images(repository.namespace_user.username, repository.name,
root_image)
except DataModelException:
logger.exception('Exception when trying to load parent images for manifest `%s`',
tag_manifest.id)
parent_images = {}
is_broken = True
for parent_image in parent_images:
image_storage_id_map[parent_image.storage.content_checksum] = parent_image.storage.id
@ -158,9 +168,22 @@ def backfill_manifest(tag_manifest):
except TagManifest.DoesNotExist:
return True
# Ensure it wasn't already created.
if lookup_map_row(tag_manifest):
return False
# Check for a pre-existing manifest matching the digest in the repository. This can happen
# if we've already created the manifest row (typically for tag reverision).
try:
manifest_row = Manifest.get(digest=manifest.digest, repository=tag_manifest.tag.repository)
except Manifest.DoesNotExist:
# Create the new-style rows for the manifest.
try:
manifest_row = populate_manifest(tag_manifest.tag.repository, manifest,
tag_manifest.tag.image, storage_ids)
except IntegrityError:
# Pre-empted.
return False
# Create the mapping row. If we find another was created for this tag manifest in the
# meantime, then we've been preempted.

View file

@ -138,3 +138,33 @@ def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initializ
manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))
assert len(manifest_blobs) == 0
def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db):
""" Tests that a manifest with a shared digest will be properly linked. """
# Delete existing tag manifest so we can reuse the tag.
TagManifestLabel.delete().execute()
TagManifest.delete().execute()
repo = model.repository.get_repository('devtable', 'gargantuan')
tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0')
tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0')
# Build a manifest and assign it to both tags (this is allowed in the old model).
builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag')
builder.add_layer('sha256:deadbeef', '{"id": "foo"}')
manifest = builder.build(docker_v2_signing_key)
manifest_1 = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest,
tag=tag_v30)
manifest_2 = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest,
tag=tag_v50)
# Backfill "both" manifests and ensure both are pointed to by a single resulting row.
assert backfill_manifest(manifest_1)
assert backfill_manifest(manifest_2)
map_row1 = TagManifestToManifest.get(tag_manifest=manifest_1)
map_row2 = TagManifestToManifest.get(tag_manifest=manifest_2)
assert map_row1.manifest == map_row2.manifest