Add new Manifest, ManifestLabel, ManifestLegacyImage and ManifestBlob tables and start writing and GCing to/from them

This change also starts passing in the manifest interface, rather than the raw data, to the model for writing.

Note that this change does *not* backfill the existing rows in to the new tables; that will occur in a followup PR. The new columns in `tagmanifest` and `tagmanifestlabel` will be used to track the backfill, as it will occur in a worker.
This commit is contained in:
Joseph Schorr 2018-07-31 15:43:09 -04:00
parent 36c7482385
commit a46660a06f
13 changed files with 476 additions and 120 deletions

View file

@ -4,13 +4,15 @@ import time
from mock import patch
from app import storage
from app import storage, docker_v2_signing_key
from contextlib import contextmanager
from playhouse.test_utils import assert_query_count
from data import model, database
from data.database import (Image, ImageStorage, DerivedStorageForImage, Label, TagManifestLabel,
ApprBlob)
ApprBlob, Manifest, TagManifest)
from image.docker.schema1 import DockerSchema1ManifestBuilder
from test.fixtures import *
@ -61,6 +63,20 @@ def create_image(docker_image_id, repository_obj, username):
return image.storage
def store_tag_manifest(namespace, repo_name, tag_name, image_id):
builder = DockerSchema1ManifestBuilder(namespace, repo_name, tag_name)
try:
image_storage = ImageStorage.select().where(~(ImageStorage.content_checksum >> None)).get()
builder.add_layer(image_storage.content_checksum, '{"id": "foo"}')
except ImageStorage.DoesNotExist:
pass
manifest = builder.build(docker_v2_signing_key)
manifest_row, _ = model.tag.store_tag_manifest(namespace, repo_name, tag_name, manifest,
leaf_layer_id=image_id)
return manifest_row
def create_repository(namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs):
user = model.user.get_user(namespace)
repo = model.repository.create_repository(namespace, name, user)
@ -86,8 +102,7 @@ def create_repository(namespace=ADMIN_ACCESS_USER, name=REPO, **kwargs):
parent=parent)
# Set the tag for the image.
tag_manifest, _ = model.tag.store_tag_manifest(namespace, name, tag_name, image_ids[-1],
'sha:someshahere', '{}')
tag_manifest = store_tag_manifest(namespace, name, tag_name, image_ids[-1])
# Add some labels to the tag.
model.label.create_manifest_label(tag_manifest, 'foo', 'bar', 'manifest')
@ -145,6 +160,13 @@ def _get_dangling_label_count():
return len(label_ids - referenced_by_manifest)
def _get_dangling_manifest_count():
manifest_ids = set([current.id for current in Manifest.select()])
referenced_by_tag_manifest = set([manifest.manifest_id for manifest in TagManifest.select()])
return len(manifest_ids - referenced_by_tag_manifest)
@contextmanager
def assert_gc_integrity(expect_storage_removed=True):
""" Specialized assertion for ensuring that GC cleans up all dangling storages
@ -158,15 +180,19 @@ def assert_gc_integrity(expect_storage_removed=True):
# Store the number of dangling storages and labels.
existing_storage_count = _get_dangling_storage_count()
existing_label_count = _get_dangling_label_count()
existing_manifest_count = _get_dangling_manifest_count()
yield
# Ensure the number of dangling storages and labels has not changed.
# Ensure the number of dangling storages, manifests and labels has not changed.
updated_storage_count = _get_dangling_storage_count()
assert updated_storage_count == existing_storage_count
updated_label_count = _get_dangling_label_count()
assert updated_label_count == existing_label_count
updated_manifest_count = _get_dangling_manifest_count()
assert updated_manifest_count == existing_manifest_count
# Ensure that for each call to the image+storage cleanup callback, the image and its
# storage is not found *anywhere* in the database.
for removed_image_and_storage in removed_image_storages:
@ -466,13 +492,11 @@ def test_images_shared_storage(default_tag_policy, initialized_db):
repository=repository, storage=image_storage,
ancestors='/')
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id,
'sha:someshahere', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id)
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'second', second_image.docker_image_id,
'sha:someshahere', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'second', second_image.docker_image_id)
# Delete the first tag.
delete_tag(repository, 'first')
@ -505,9 +529,8 @@ def test_image_with_cas(default_tag_policy, initialized_db):
repository=repository, storage=image_storage,
ancestors='/')
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id,
'sha:someshahere1', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id)
assert_not_deleted(repository, 'i1')
@ -553,13 +576,11 @@ def test_images_shared_cas(default_tag_policy, initialized_db):
repository=repository, storage=is2,
ancestors='/')
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id,
'sha:someshahere1', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id)
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'second', second_image.docker_image_id,
'sha:someshahere2', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'second', second_image.docker_image_id)
assert_not_deleted(repository, 'i1', 'i2')
@ -602,9 +623,8 @@ def test_images_shared_cas_with_new_blob_table(default_tag_policy, initialized_d
repository=repository, storage=is1,
ancestors='/')
model.tag.store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id,
'sha:someshahere1', '{}')
store_tag_manifest(repository.namespace_user.username, repository.name,
'first', first_image.docker_image_id)
assert_not_deleted(repository, 'i1')