Add new Manifest, ManifestLabel, ManifestLegacyImage and ManifestBlob tables and start writing and GCing to/from them

This change also starts passing in the manifest interface, rather than the raw data, to the model for writing.

Note that this change does *not* backfill the existing rows in to the new tables; that will occur in a followup PR. The new columns in `tagmanifest` and `tagmanifestlabel` will be used to track the backfill, as it will occur in a worker.
This commit is contained in:
Joseph Schorr 2018-07-31 15:43:09 -04:00
parent 36c7482385
commit a46660a06f
13 changed files with 476 additions and 120 deletions

View file

@ -1,5 +1,4 @@
import logging
import time
from calendar import timegm
from uuid import uuid4
@ -10,7 +9,8 @@ from data.model import (image, db_transaction, DataModelException, _basequery,
config)
from data.database import (RepositoryTag, Repository, Image, ImageStorage, Namespace, TagManifest,
RepositoryNotification, Label, TagManifestLabel, get_epoch_timestamp,
db_for_update)
db_for_update, Manifest, ManifestLabel, ManifestBlob,
ManifestLegacyImage)
from util.timedeltastring import convert_to_timedelta
@ -352,44 +352,64 @@ def _delete_tags(repo, query_modifier=None):
return set()
with db_transaction():
manifests_to_delete = list(TagManifest
.select(TagManifest.id)
.join(RepositoryTag)
.where(RepositoryTag.id << tags_to_delete))
# TODO(jschorr): Update to not use TagManifest once that table has been deprecated.
tag_manifests_to_delete = list(TagManifest
.select()
.join(RepositoryTag)
.where(RepositoryTag.id << tags_to_delete))
tag_manifest_ids_to_delete = [tagmanifest.id for tagmanifest in tag_manifests_to_delete]
manifest_ids_to_delete = [tagmanifest.manifest_id for tagmanifest in tag_manifests_to_delete
if tagmanifest.manifest is not None]
num_deleted_manifests = 0
if len(manifests_to_delete) > 0:
if len(tag_manifest_ids_to_delete) > 0:
# Find the set of IDs for all the labels to delete.
manifest_labels_query = (TagManifestLabel
.select()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete))
TagManifestLabel.annotated << tag_manifest_ids_to_delete))
label_ids = [manifest_label.label_id for manifest_label in manifest_labels_query]
if label_ids:
# Delete all the mapping entries.
(TagManifestLabel
# Delete all the mapping entries for labels.
(TagManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << tag_manifest_ids_to_delete)
.execute())
if manifest_ids_to_delete:
(ManifestLabel
.delete()
.where(TagManifestLabel.repository == repo,
TagManifestLabel.annotated << manifests_to_delete)
.where(ManifestLabel.manifest << manifest_ids_to_delete)
.execute())
# Delete the labels themselves.
if label_ids:
# Delete all the matching labels.
Label.delete().where(Label.id << label_ids).execute()
# Delete the old-style manifests.
num_deleted_manifests = (TagManifest
.delete()
.where(TagManifest.id << manifests_to_delete)
.where(TagManifest.id << tag_manifest_ids_to_delete)
.execute())
# Delete the new-style manifests, if any.
if manifest_ids_to_delete:
(ManifestLegacyImage
.delete()
.where(ManifestLegacyImage.manifest << manifest_ids_to_delete)
.execute())
ManifestBlob.delete().where(ManifestBlob.manifest << manifest_ids_to_delete).execute()
Manifest.delete().where(Manifest.id << manifest_ids_to_delete).execute()
num_deleted_tags = (RepositoryTag
.delete()
.where(RepositoryTag.id << tags_to_delete)
.execute())
logger.debug('Removed %s tags with %s manifests', num_deleted_tags, num_deleted_manifests)
ancestors = reduce(lambda r, l: r | l,
(set(tag.image.ancestor_id_list()) for tag in tags_to_delete))
direct_referenced = {tag.image.id for tag in tags_to_delete}
@ -459,14 +479,14 @@ def restore_tag_to_manifest(repo_obj, tag_name, manifest_digest):
# Verify that the manifest digest already existed under this repository under the
# tag.
try:
manifest = (TagManifest
.select(TagManifest, RepositoryTag, Image)
.join(RepositoryTag)
.join(Image)
.where(RepositoryTag.repository == repo_obj)
.where(RepositoryTag.name == tag_name)
.where(TagManifest.digest == manifest_digest)
.get())
tag_manifest = (TagManifest
.select(TagManifest, RepositoryTag, Image)
.join(RepositoryTag)
.join(Image)
.where(RepositoryTag.repository == repo_obj)
.where(RepositoryTag.name == tag_name)
.where(TagManifest.digest == manifest_digest)
.get())
except TagManifest.DoesNotExist:
raise DataModelException('Cannot restore to unknown or invalid digest')
@ -476,9 +496,12 @@ def restore_tag_to_manifest(repo_obj, tag_name, manifest_digest):
except DataModelException:
existing_image = None
docker_image_id = manifest.tag.image.docker_image_id
store_tag_manifest(repo_obj.namespace_user.username, repo_obj.name, tag_name, docker_image_id,
manifest_digest, manifest.json_data, reversion=True)
# Change the tag manifest to point to the updated image.
docker_image_id = tag_manifest.tag.image.docker_image_id
updated_tag = create_or_update_tag_for_repo(repo_obj.id, tag_name, docker_image_id,
reversion=True)
tag_manifest.tag = updated_tag
tag_manifest.save()
return existing_image
@ -509,8 +532,8 @@ def restore_tag_to_image(repo_obj, tag_name, docker_image_id):
return existing_image
def store_tag_manifest(namespace_name, repository_name, tag_name, docker_image_id, manifest_digest,
manifest_data, reversion=False):
def store_tag_manifest(namespace_name, repository_name, tag_name, manifest, leaf_layer_id=None,
reversion=False):
""" Stores a tag manifest for a specific tag name in the database. Returns the TagManifest
object, as well as a boolean indicating whether the TagManifest was created.
"""
@ -519,25 +542,27 @@ def store_tag_manifest(namespace_name, repository_name, tag_name, docker_image_i
except Repository.DoesNotExist:
raise DataModelException('Invalid repository %s/%s' % (namespace_name, repository_name))
return store_tag_manifest_for_repo(repo.id, tag_name, docker_image_id, manifest_digest,
manifest_data, reversion=False)
return store_tag_manifest_for_repo(repo.id, tag_name, manifest, leaf_layer_id=leaf_layer_id,
reversion=False)
def store_tag_manifest_for_repo(repository_id, tag_name, docker_image_id, manifest_digest,
manifest_data, reversion=False):
def store_tag_manifest_for_repo(repository_id, tag_name, manifest, leaf_layer_id=None,
reversion=False):
""" Stores a tag manifest for a specific tag name in the database. Returns the TagManifest
object, as well as a boolean indicating whether the TagManifest was created.
"""
docker_image_id = leaf_layer_id or manifest.leaf_layer_v1_image_id
with db_transaction():
tag = create_or_update_tag_for_repo(repository_id, tag_name, docker_image_id,
reversion=reversion)
try:
manifest = TagManifest.get(digest=manifest_digest)
manifest = TagManifest.get(digest=manifest.digest)
manifest.tag = tag
manifest.save()
return manifest, False
except TagManifest.DoesNotExist:
return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data), True
return _create_manifest(tag, manifest), True
def get_active_tag(namespace, repo_name, tag_name):
@ -558,10 +583,33 @@ def get_possibly_expired_tag(namespace, repo_name, tag_name):
Namespace.username == namespace)).get()
def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest_digest,
manifest_data):
def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest):
tag = get_active_tag(namespace, repo_name, tag_name)
return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data)
return _create_manifest(tag, manifest)
def _create_manifest(tag, manifest):
# Lookup all blobs in the manifest.
blobs = ImageStorage.select().where(ImageStorage.content_checksum << list(manifest.blob_digests))
blob_map = {}
for blob in blobs:
blob_map[blob.content_checksum] = blob
with db_transaction():
media_type = Manifest.media_type.get_id(manifest.media_type)
manifest_row = Manifest.create(digest=manifest.digest, repository=tag.repository,
manifest_bytes=manifest.bytes, media_type=media_type)
ManifestLegacyImage.create(manifest=manifest_row, repository=tag.repository, image=tag.image)
for index, blob_digest in enumerate(reversed(manifest.blob_digests)):
image_storage = blob_map.get(blob_digest)
if image_storage is None:
raise DataModelException('Missing blob for manifest')
ManifestBlob.create(manifest=manifest_row, repository=tag.repository, blob=image_storage,
blob_index=index)
return TagManifest.create(tag=tag, digest=manifest.digest, json_data=manifest.bytes,
manifest=manifest_row)
def load_tag_manifest(namespace, repo_name, tag_name):