Merge pull request #3186 from quay/joseph.schorr/QUAY-1013-1015/manifest-migration

First phase of V2_2 data model migrations
This commit is contained in:
Joseph Schorr 2018-08-06 13:56:14 -04:00 committed by GitHub
commit 7068010751
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 570 additions and 179 deletions

View file

@ -2,7 +2,8 @@ import logging
from cachetools import lru_cache
from data.database import Label, TagManifestLabel, MediaType, LabelSourceType, db_transaction
from data.database import (Label, TagManifestLabel, MediaType, LabelSourceType, db_transaction,
ManifestLabel)
from data.model import InvalidLabelKeyException, InvalidMediaTypeException, DataModelException
from data.text import prefix_search
from util.validation import validate_label_key
@ -70,6 +71,9 @@ def create_manifest_label(tag_manifest, key, value, source_type_name, media_type
label = Label.create(key=key, value=value, source_type=source_type_id, media_type=media_type_id)
TagManifestLabel.create(annotated=tag_manifest, label=label,
repository=tag_manifest.tag.repository)
if tag_manifest.manifest is not None:
ManifestLabel.create(manifest=tag_manifest.manifest, label=label,
repository=tag_manifest.tag.repository)
return label
@ -115,11 +119,14 @@ def delete_manifest_label(label_uuid, tag_manifest):
if not label.source_type.mutable:
raise DataModelException('Cannot delete immutable label')
# Delete the mapping record and label.
# Delete the mapping records and label.
deleted_count = TagManifestLabel.delete().where(TagManifestLabel.label == label).execute()
if deleted_count != 1:
logger.warning('More than a single label deleted for matching label %s', label_uuid)
deleted_count = ManifestLabel.delete().where(ManifestLabel.label == label).execute()
if deleted_count != 1:
logger.warning('More than a single label deleted for matching label %s', label_uuid)
label.delete_instance(recursive=False)
return label

View file

@ -10,9 +10,10 @@ from data.model import (
config, DataModelException, tag, db_transaction, storage, permission, _basequery)
from data.database import (
Repository, Namespace, RepositoryTag, Star, Image, ImageStorage, User, Visibility,
RepositoryPermission, RepositoryActionCount, Role, RepositoryAuthorizedEmail, TagManifest,
DerivedStorageForImage, Label, TagManifestLabel, db_for_update, get_epoch_timestamp,
db_random_func, db_concat_func, RepositorySearchScore, RepositoryKind, ApprTag)
RepositoryPermission, RepositoryActionCount, Role, RepositoryAuthorizedEmail,
DerivedStorageForImage, Label, db_for_update, get_epoch_timestamp,
db_random_func, db_concat_func, RepositorySearchScore, RepositoryKind, ApprTag,
ManifestLegacyImage, Manifest)
from data.text import prefix_search
from util.itertoolrecipes import take
@ -275,6 +276,13 @@ def garbage_collect_repo(repo, extra_candidate_set=None, is_purge=False):
logger.info('Could not GC derived images %s; will try again soon', image_ids_to_remove)
return False
# Delete any legacy references to the images.
(ManifestLegacyImage
.delete()
.where(ManifestLegacyImage.image << image_ids_to_remove)
.execute())
# Delete the images themselves.
try:
Image.delete().where(Image.id << image_ids_to_remove).execute()
except IntegrityError:

View file

@ -9,7 +9,7 @@ from data.model import (config, db_transaction, InvalidImageException, TorrentIn
from data.database import (ImageStorage, Image, ImageStoragePlacement, ImageStorageLocation,
ImageStorageTransformation, ImageStorageSignature,
ImageStorageSignatureKind, Repository, Namespace, TorrentInfo, ApprBlob,
ensure_under_transaction)
ensure_under_transaction, ManifestBlob)
logger = logging.getLogger(__name__)
@ -161,6 +161,12 @@ def garbage_collect_storage(storage_id_whitelist):
.execute())
logger.debug('Removed %s image storage signatures', signatures_removed)
blob_refs_removed = (ManifestBlob
.delete()
.where(ManifestBlob.blob << orphaned_storage_ids)
.execute())
logger.debug('Removed %s blob references', blob_refs_removed)
storages_removed = (ImageStorage
.delete()
.where(ImageStorage.id << orphaned_storage_ids)

View file

@ -1,5 +1,4 @@
import logging
import time
from calendar import timegm
from uuid import uuid4
@ -10,7 +9,8 @@ from data.model import (image, db_transaction, DataModelException, _basequery,
config)
from data.database import (RepositoryTag, Repository, Image, ImageStorage, Namespace, TagManifest,
RepositoryNotification, Label, TagManifestLabel, get_epoch_timestamp,
db_for_update)
db_for_update, Manifest, ManifestLabel, ManifestBlob,
ManifestLegacyImage)
from util.timedeltastring import convert_to_timedelta
@ -352,44 +352,64 @@ def _delete_tags(repo, query_modifier=None):
return set()
with db_transaction():
manifests_to_delete = list(TagManifest
.select(TagManifest.id)
.join(RepositoryTag)
.where(RepositoryTag.id << tags_to_delete))
# TODO(jschorr): Update to not use TagManifest once that table has been deprecated.
tag_manifests_to_delete = list(TagManifest
.select()
.join(RepositoryTag)
.where(RepositoryTag.id << tags_to_delete))
tag_manifest_ids_to_delete = [tagmanifest.id for tagmanifest in tag_manifests_to_delete]
manifest_ids_to_delete = [tagmanifest.manifest_id for tagmanifest in tag_manifests_to_delete
if tagmanifest.manifest is not None]
num_deleted_manifests = 0
if len(manifests_to_delete) > 0:
if len(tag_manifest_ids_to_delete) > 0:
# Find the set of IDs for all the labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete))
TagManifestLabel.annotated << tag_manifest_ids_to_delete))
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
if label_ids:
# Delete all the mapping entries.
(TagManifestLabel
# Delete all the mapping entries for labels.
(TagManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << tag_manifest_ids_to_delete)
.execute())
if manifest_ids_to_delete:
(ManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete)
.where(ManifestLabel.manifest << manifest_ids_to_delete)
.execute())
# Delete the labels themselves.
if label_ids:
# Delete all the matching labels.
Label.delete().where(Label.id << label_ids).execute()
# Delete the old-style manifests.
num_deleted_manifests = (TagManifest
.delete()
.where(TagManifest.id << manifests_to_delete)
.where(TagManifest.id << tag_manifest_ids_to_delete)
.execute())
# Delete the new-style manifests, if any.
if manifest_ids_to_delete:
(ManifestLegacyImage
.delete()
.where(ManifestLegacyImage.manifest << manifest_ids_to_delete)
.execute())
ManifestBlob.delete().where(ManifestBlob.manifest << manifest_ids_to_delete).execute()
Manifest.delete().where(Manifest.id << manifest_ids_to_delete).execute()
num_deleted_tags = (RepositoryTag
.delete()
.where(RepositoryTag.id << tags_to_delete)
.execute())
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
ancestors = reduce(lambda r, l: r | l,
(set(tag.image.ancestor_id_list()) for tag in tags_to_delete))
direct_referenced = {tag.image.id for tag in tags_to_delete}
@ -459,14 +479,14 @@ def restore_tag_to_manifest(repo_obj, tag_name, manifest_digest):
# Verify that the manifest digest already existed under this repository under the
# tag.
try:
manifest = (TagManifest
.select(TagManifest, RepositoryTag, Image)
.join(RepositoryTag)
.join(Image)
.where(RepositoryTag.repository == repo_obj)
.where(RepositoryTag.name == tag_name)
.where(TagManifest.digest == manifest_digest)
.get())
tag_manifest = (TagManifest
.select(TagManifest, RepositoryTag, Image)
.join(RepositoryTag)
.join(Image)
.where(RepositoryTag.repository == repo_obj)
.where(RepositoryTag.name == tag_name)
.where(TagManifest.digest == manifest_digest)
.get())
except TagManifest.DoesNotExist:
raise DataModelException('Cannot restore to unknown or invalid digest')
@ -476,9 +496,12 @@ def restore_tag_to_manifest(repo_obj, tag_name, manifest_digest):
except DataModelException:
existing_image = None
docker_image_id = manifest.tag.image.docker_image_id
store_tag_manifest(repo_obj.namespace_user.username, repo_obj.name, tag_name, docker_image_id,
manifest_digest, manifest.json_data, reversion=True)
# Change the tag manifest to point to the updated image.
docker_image_id = tag_manifest.tag.image.docker_image_id
updated_tag = create_or_update_tag_for_repo(repo_obj.id, tag_name, docker_image_id,
reversion=True)
tag_manifest.tag = updated_tag
tag_manifest.save()
return existing_image
@ -509,8 +532,8 @@ def restore_tag_to_image(repo_obj, tag_name, docker_image_id):
return existing_image
def store_tag_manifest(namespace_name, repository_name, tag_name, docker_image_id, manifest_digest,
manifest_data, reversion=False):
def store_tag_manifest(namespace_name, repository_name, tag_name, manifest, leaf_layer_id=None,
reversion=False):
""" Stores a tag manifest for a specific tag name in the database. Returns the TagManifest
object, as well as a boolean indicating whether the TagManifest was created.
"""
@ -519,25 +542,27 @@ def store_tag_manifest(namespace_name, repository_name, tag_name, docker_image_i
except Repository.DoesNotExist:
raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name))
return store_tag_manifest_for_repo(repo.id, tag_name, docker_image_id, manifest_digest,
manifest_data, reversion=False)
return store_tag_manifest_for_repo(repo.id, tag_name, manifest, leaf_layer_id=leaf_layer_id,
reversion=False)
def store_tag_manifest_for_repo(repository_id, tag_name, docker_image_id, manifest_digest,
manifest_data, reversion=False):
def store_tag_manifest_for_repo(repository_id, tag_name, manifest, leaf_layer_id=None,
reversion=False):
""" Stores a tag manifest for a specific tag name in the database. Returns the TagManifest
object, as well as a boolean indicating whether the TagManifest was created.
"""
docker_image_id = leaf_layer_id or manifest.leaf_layer_v1_image_id
with db_transaction():
tag = create_or_update_tag_for_repo(repository_id, tag_name, docker_image_id,
reversion=reversion)
try:
manifest = TagManifest.get(digest=manifest_digest)
manifest = TagManifest.get(digest=manifest.digest)
manifest.tag = tag
manifest.save()
return manifest, False
except TagManifest.DoesNotExist:
return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data), True
return _create_manifest(tag, manifest), True
def get_active_tag(namespace, repo_name, tag_name):
@ -558,10 +583,33 @@ def get_possibly_expired_tag(namespace, repo_name, tag_name):
Namespace.username == namespace)).get()
def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest_digest,
manifest_data):
def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest):
tag = get_active_tag(namespace, repo_name, tag_name)
return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data)
return _create_manifest(tag, manifest)
def _create_manifest(tag, manifest):
# Lookup all blobs in the manifest.
blobs = ImageStorage.select().where(ImageStorage.content_checksum << list(manifest.blob_digests))
blob_map = {}
for blob in blobs:
blob_map[blob.content_checksum] = blob
with db_transaction():
media_type = Manifest.media_type.get_id(manifest.media_type)
manifest_row = Manifest.create(digest=manifest.digest, repository=tag.repository,
manifest_bytes=manifest.bytes, media_type=media_type)
ManifestLegacyImage.create(manifest=manifest_row, repository=tag.repository, image=tag.image)
for index, blob_digest in enumerate(reversed(manifest.blob_digests)):
image_storage = blob_map.get(blob_digest)
if image_storage is None:
raise DataModelException('Missing blob for manifest')
ManifestBlob.create(manifest=manifest_row, repository=tag.repository, blob=image_storage,
blob_index=index)
return TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes,
manifest=manifest_row)
def load_tag_manifest(namespace, repo_name, tag_name):

View file

@ -89,6 +89,9 @@ def test_filter_repositories(username, include_public, filter_to_namespace, repo
.switch(Repository)
.join(RepositoryPermission, JOIN.LEFT_OUTER))
# Prime the cache.
Repository.kind.get_id('image')
with assert_query_count(1):
found = list(filter_to_repos_for_user(query, user.id,
namespace=namespace,

View file

@ -4,13 +4,15 @@ import time
from mock import patch
from app import storage
from app import storage, docker_v2_signing_key
from contextlib import contextmanager
from playhouse.test_utils import assert_query_count
from data import model, database
from data.database import (Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel,
ApprBlob)
ApprBlob, Manifest, TagManifest)
from image.docker.schema1 import DockerSchema1ManifestBuilder
from test.fixtures import *
@ -61,6 +63,20 @@ def create_image(docker_image_id, repository_obj, username):
return image.storage
def store_tag_manifest(namespace, repo_name, tag_name, image_id):
builder = DockerSchema1ManifestBuilder(namespace, repo_name, tag_name)
try:
image_storage = ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).get()
builder.add_layer(image_storage.content_checksum, '{"id": "foo"}')
except ImageStorage.DoesNotExist:
pass
manifest = builder.build(docker_v2_signing_key)
manifest_row, _ = model.tag.store_tag_manifest(namespace, repo_name, tag_name, manifest,
leaf_layer_id=image_id)
return manifest_row
def create_repository(namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs):
user = model.user.get_user(namespace)
repo = model.repository.create_repository(namespace, name, user)
@ -86,8 +102,7 @@ def create_repository(namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs):
parent=parent)
# Set the tag for the image.
tag_manifest, _ = model.tag.store_tag_manifest(namespace, name, tag_name, image_ids[-1],
'sha:someshahere', '{}')
tag_manifest = store_tag_manifest(namespace, name, tag_name, image_ids[-1])
# Add some labels to the tag.
model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest')
@ -145,6 +160,13 @@ def _get_dangling_label_count():
return len(label_ids - referenced_by_manifest)
def _get_dangling_manifest_count():
manifest_ids = set([current.id for current in Manifest.select()])
referenced_by_tag_manifest = set([manifest.manifest_id for manifest in TagManifest.select()])
return len(manifest_ids - referenced_by_tag_manifest)
@contextmanager
def assert_gc_integrity(expect_storage_removed=True):
""" Specialized assertion for ensuring that GC cleans up all dangling storages
@ -158,15 +180,19 @@ def assert_gc_integrity(expect_storage_removed=True):
# Store the number of dangling storages and labels.
existing_storage_count = _get_dangling_storage_count()
existing_label_count = _get_dangling_label_count()
existing_manifest_count = _get_dangling_manifest_count()
yield
# Ensure the number of dangling storages and labels has not changed.
# Ensure the number of dangling storages, manifests and labels has not changed.
updated_storage_count = _get_dangling_storage_count()
assert updated_storage_count == existing_storage_count
updated_label_count = _get_dangling_label_count()
assert updated_label_count == existing_label_count
updated_manifest_count = _get_dangling_manifest_count()
assert updated_manifest_count == existing_manifest_count
# Ensure that for each call to the image+storage cleanup callback, the image and its
# storage is not found *anywhere* in the database.
for removed_image_and_storage in removed_image_storages:
@ -466,13 +492,11 @@ def test_images_shared_storage(default_tag_policy, initialized_db):
repository=repository, storage=image_storage,
ancestors='/')
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id,
'sha:someshahere', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id)
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'second', second_image.docker_image_id,
'sha:someshahere', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'second', second_image.docker_image_id)
# Delete the first tag.
delete_tag(repository, 'first')
@ -505,9 +529,8 @@ def test_image_with_cas(default_tag_policy, initialized_db):
repository=repository, storage=image_storage,
ancestors='/')
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id,
'sha:someshahere1', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id)
assert_not_deleted(repository, 'i1')
@ -553,13 +576,11 @@ def test_images_shared_cas(default_tag_policy, initialized_db):
repository=repository, storage=is2,
ancestors='/')
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id,
'sha:someshahere1', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id)
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'second', second_image.docker_image_id,
'sha:someshahere2', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'second', second_image.docker_image_id)
assert_not_deleted(repository, 'i1', 'i2')
@ -602,9 +623,8 @@ def test_images_shared_cas_with_new_blob_table(default_tag_policy, initialized_d
repository=repository, storage=is1,
ancestors='/')
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id,
'sha:someshahere1', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id)
assert_not_deleted(repository, 'i1')

View file

@ -1,19 +1,26 @@
import pytest
import json
from datetime import datetime
from mock import patch
from time import time
from data.database import Image, RepositoryTag, ImageStorage, Repository
import pytest
from mock import patch
from app import docker_v2_signing_key
from data.database import (Image, RepositoryTag, ImageStorage, Repository, Manifest, ManifestBlob,
ManifestLegacyImage)
from data.model.repository import create_repository
from data.model.tag import (list_active_repo_tags, create_or_update_tag, delete_tag,
get_matching_tags, _tag_alive, get_matching_tags_for_images,
change_tag_expiration, get_active_tag)
change_tag_expiration, get_active_tag, store_tag_manifest)
from data.model.image import find_create_or_link_image
from image.docker.schema1 import DockerSchema1ManifestBuilder
from util.timedeltastring import convert_to_timedelta
from test.fixtures import *
def _get_expected_tags(image):
expected_query = (RepositoryTag
.select()
@ -211,3 +218,31 @@ def test_change_tag_expiration(expiration_offset, expected_offset, initialized_d
end_date = datetime.utcfromtimestamp(footag_updated.lifetime_end_ts)
expected_end_date = start_date + convert_to_timedelta(expected_offset)
assert (expected_end_date - end_date).total_seconds() < 5 # variance in test
def test_store_tag_manifest(initialized_db):
# Create a manifest with some layers.
builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'sometag')
storages = list(ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).limit(10))
assert storages
repo = model.repository.get_repository('devtable', 'simple')
for index, storage in enumerate(storages):
image_id = 'someimage%s' % index
builder.add_layer(storage.content_checksum, json.dumps({'id': image_id}))
find_create_or_link_image(image_id, repo, 'devtable', {}, 'local_us')
manifest = builder.build(docker_v2_signing_key)
tag_manifest, _ = store_tag_manifest('devtable', 'simple', 'sometag', manifest)
# Ensure we have the new-model expected rows.
assert tag_manifest.manifest is not None
assert tag_manifest.manifest.manifest_bytes == manifest.bytes
assert tag_manifest.manifest.digest == str(manifest.digest)
blob_rows = {m.blob_id for m in
ManifestBlob.select().where(ManifestBlob.manifest == tag_manifest.manifest)}
assert blob_rows == {s.id for s in storages}
assert ManifestLegacyImage.get(manifest=tag_manifest.manifest).image == tag_manifest.tag.image