2018-11-05 18:03:08 +00:00
|
|
|
import logging
|
|
|
|
|
2018-11-12 21:27:49 +00:00
|
|
|
from collections import namedtuple
|
|
|
|
|
2018-11-19 21:31:41 +00:00
|
|
|
from peewee import IntegrityError, JOIN
|
2018-11-05 18:03:08 +00:00
|
|
|
|
2018-11-12 21:27:49 +00:00
|
|
|
from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild,
|
|
|
|
db_transaction)
|
|
|
|
from data.model import BlobDoesNotExist
|
2019-02-14 17:46:42 +00:00
|
|
|
from data.model.blob import get_or_create_shared_blob, get_shared_blob
|
2018-11-05 18:03:08 +00:00
|
|
|
from data.model.oci.tag import filter_to_alive_tags
|
2018-11-12 21:27:49 +00:00
|
|
|
from data.model.oci.label import create_manifest_label
|
2018-11-19 10:24:04 +00:00
|
|
|
from data.model.oci.retriever import RepositoryContentRetriever
|
|
|
|
from data.model.storage import lookup_repo_storages_by_content_checksum
|
2018-11-05 18:03:08 +00:00
|
|
|
from data.model.image import lookup_repository_images, get_image, synthesize_v1_image
|
2018-11-25 14:16:59 +00:00
|
|
|
from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES
|
2018-11-12 21:27:49 +00:00
|
|
|
from image.docker.schema1 import ManifestException
|
|
|
|
from image.docker.schema2.list import MalformedSchema2ManifestList
|
|
|
|
from util.validation import is_json
|
|
|
|
|
2018-11-05 18:03:08 +00:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
2018-11-12 21:27:49 +00:00
|
|
|
CreatedManifest = namedtuple('CreatedManifest', ['manifest', 'newly_created', 'labels_to_apply'])
|
|
|
|
|
|
|
|
|
2018-11-05 18:03:08 +00:00
|
|
|
def lookup_manifest(repository_id, manifest_digest, allow_dead=False):
|
|
|
|
""" Returns the manifest with the specified digest under the specified repository
|
|
|
|
or None if none. If allow_dead is True, then manifests referenced by only
|
|
|
|
dead tags will also be returned.
|
|
|
|
"""
|
|
|
|
query = (Manifest
|
|
|
|
.select()
|
|
|
|
.where(Manifest.repository == repository_id)
|
|
|
|
.where(Manifest.digest == manifest_digest))
|
|
|
|
|
2018-11-19 21:31:41 +00:00
|
|
|
if allow_dead:
|
|
|
|
try:
|
|
|
|
return query.get()
|
|
|
|
except Manifest.DoesNotExist:
|
|
|
|
return None
|
|
|
|
|
|
|
|
# Try first to filter to those manifests referenced by an alive tag,
|
|
|
|
try:
|
|
|
|
return filter_to_alive_tags(query.join(Tag)).get()
|
|
|
|
except Manifest.DoesNotExist:
|
|
|
|
pass
|
|
|
|
|
|
|
|
# Try referenced as the child of a manifest that has an alive tag.
|
|
|
|
query = (query
|
|
|
|
.join(ManifestChild, on=(ManifestChild.child_manifest == Manifest.id))
|
|
|
|
.join(Tag, on=(Tag.manifest == ManifestChild.manifest)))
|
|
|
|
|
|
|
|
query = filter_to_alive_tags(query)
|
2018-11-05 18:03:08 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
return query.get()
|
|
|
|
except Manifest.DoesNotExist:
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2018-11-12 21:27:49 +00:00
|
|
|
def get_or_create_manifest(repository_id, manifest_interface_instance, storage):
|
|
|
|
""" Returns a CreatedManifest for the manifest in the specified repository with the matching
|
|
|
|
digest (if it already exists) or, if not yet created, creates and returns the manifest.
|
|
|
|
Returns None if there was an error creating the manifest.
|
2018-11-05 18:03:08 +00:00
|
|
|
Note that *all* blobs referenced by the manifest must exist already in the repository or this
|
2018-11-12 21:27:49 +00:00
|
|
|
method will fail with a None.
|
2018-11-05 18:03:08 +00:00
|
|
|
"""
|
|
|
|
existing = lookup_manifest(repository_id, manifest_interface_instance.digest, allow_dead=True)
|
|
|
|
if existing is not None:
|
2018-11-12 21:27:49 +00:00
|
|
|
return CreatedManifest(manifest=existing, newly_created=False, labels_to_apply=None)
|
|
|
|
|
|
|
|
return _create_manifest(repository_id, manifest_interface_instance, storage)
|
|
|
|
|
|
|
|
|
|
|
|
def _create_manifest(repository_id, manifest_interface_instance, storage):
|
2018-12-19 18:42:07 +00:00
|
|
|
# Validate the manifest.
|
2018-11-19 10:24:04 +00:00
|
|
|
retriever = RepositoryContentRetriever.for_repository(repository_id, storage)
|
2018-12-19 18:42:07 +00:00
|
|
|
try:
|
|
|
|
manifest_interface_instance.validate(retriever)
|
|
|
|
except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist, IOError):
|
|
|
|
logger.exception('Could not validate manifest `%s`', manifest_interface_instance.digest)
|
|
|
|
return None
|
|
|
|
|
|
|
|
# Load, parse and get/create the child manifests, if any.
|
2018-11-19 10:24:04 +00:00
|
|
|
child_manifest_refs = manifest_interface_instance.child_manifests(retriever)
|
2018-11-19 16:26:22 +00:00
|
|
|
child_manifest_rows = {}
|
2018-11-12 21:27:49 +00:00
|
|
|
child_manifest_label_dicts = []
|
|
|
|
|
|
|
|
if child_manifest_refs is not None:
|
|
|
|
for child_manifest_ref in child_manifest_refs:
|
|
|
|
# Load and parse the child manifest.
|
|
|
|
try:
|
|
|
|
child_manifest = child_manifest_ref.manifest_obj
|
2018-11-19 10:24:04 +00:00
|
|
|
except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist, IOError):
|
2018-11-12 21:27:49 +00:00
|
|
|
logger.exception('Could not load manifest list for manifest `%s`',
|
|
|
|
manifest_interface_instance.digest)
|
|
|
|
return None
|
|
|
|
|
|
|
|
# Retrieve its labels.
|
2018-11-19 10:24:04 +00:00
|
|
|
labels = child_manifest.get_manifest_labels(retriever)
|
2018-11-12 21:27:49 +00:00
|
|
|
if labels is None:
|
|
|
|
logger.exception('Could not load manifest labels for child manifest')
|
|
|
|
return None
|
|
|
|
|
2018-11-14 07:15:58 +00:00
|
|
|
# Get/create the child manifest in the database.
|
2018-11-12 21:27:49 +00:00
|
|
|
child_manifest_info = get_or_create_manifest(repository_id, child_manifest, storage)
|
|
|
|
if child_manifest_info is None:
|
|
|
|
logger.error('Could not get/create child manifest')
|
|
|
|
return None
|
|
|
|
|
2018-11-19 16:26:22 +00:00
|
|
|
child_manifest_rows[child_manifest_info.manifest.digest] = child_manifest_info.manifest
|
2018-11-12 21:27:49 +00:00
|
|
|
child_manifest_label_dicts.append(labels)
|
2018-11-05 18:03:08 +00:00
|
|
|
|
|
|
|
# Ensure all the blobs in the manifest exist.
|
2018-11-19 10:24:04 +00:00
|
|
|
digests = set(manifest_interface_instance.local_blob_digests)
|
2018-11-14 11:21:50 +00:00
|
|
|
blob_map = {}
|
2019-02-14 17:46:42 +00:00
|
|
|
|
|
|
|
# If the special empty layer is required, simply load it directly. This is much faster
|
|
|
|
# than trying to load it on a per repository basis, and that is unnecessary anyway since
|
|
|
|
# this layer is predefined.
|
|
|
|
if EMPTY_LAYER_BLOB_DIGEST in digests:
|
|
|
|
digests.remove(EMPTY_LAYER_BLOB_DIGEST)
|
|
|
|
blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(EMPTY_LAYER_BLOB_DIGEST)
|
|
|
|
if not blob_map[EMPTY_LAYER_BLOB_DIGEST]:
|
|
|
|
logger.warning('Could not find the special empty blob in storage')
|
|
|
|
return None
|
|
|
|
|
2018-11-14 11:21:50 +00:00
|
|
|
if digests:
|
|
|
|
query = lookup_repo_storages_by_content_checksum(repository_id, digests)
|
2019-02-14 17:46:42 +00:00
|
|
|
blob_map.update({s.content_checksum: s for s in query})
|
2018-11-14 11:21:50 +00:00
|
|
|
for digest_str in digests:
|
|
|
|
if digest_str not in blob_map:
|
|
|
|
logger.warning('Unknown blob `%s` under manifest `%s` for repository `%s`', digest_str,
|
|
|
|
manifest_interface_instance.digest, repository_id)
|
|
|
|
return None
|
2018-11-12 21:27:49 +00:00
|
|
|
|
2018-11-25 14:16:59 +00:00
|
|
|
# Special check: If the empty layer blob is needed for this manifest, add it to the
|
|
|
|
# blob map. This is necessary because Docker decided to elide sending of this special
|
|
|
|
# empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
|
2019-02-14 17:46:42 +00:00
|
|
|
if EMPTY_LAYER_BLOB_DIGEST not in blob_map:
|
|
|
|
if manifest_interface_instance.get_requires_empty_layer_blob(retriever):
|
|
|
|
shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage)
|
|
|
|
assert not shared_blob.uploading
|
|
|
|
assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
|
|
|
|
blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob
|
2018-11-25 14:16:59 +00:00
|
|
|
|
2018-11-12 21:27:49 +00:00
|
|
|
# Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
|
|
|
|
# image.
|
|
|
|
legacy_image = None
|
2018-11-21 15:38:12 +00:00
|
|
|
if manifest_interface_instance.has_legacy_image:
|
2018-11-12 21:27:49 +00:00
|
|
|
legacy_image_id = _populate_legacy_image(repository_id, manifest_interface_instance, blob_map,
|
2018-11-19 10:24:04 +00:00
|
|
|
retriever)
|
2018-11-12 21:27:49 +00:00
|
|
|
if legacy_image_id is None:
|
|
|
|
return None
|
|
|
|
|
|
|
|
legacy_image = get_image(repository_id, legacy_image_id)
|
|
|
|
if legacy_image is None:
|
|
|
|
return None
|
2018-11-05 18:03:08 +00:00
|
|
|
|
|
|
|
# Create the manifest and its blobs.
|
2018-11-12 21:27:49 +00:00
|
|
|
media_type = Manifest.media_type.get_id(manifest_interface_instance.media_type)
|
2018-11-05 18:03:08 +00:00
|
|
|
storage_ids = {storage.id for storage in blob_map.values()}
|
|
|
|
|
|
|
|
with db_transaction():
|
|
|
|
# Create the manifest.
|
|
|
|
try:
|
|
|
|
manifest = Manifest.create(repository=repository_id,
|
|
|
|
digest=manifest_interface_instance.digest,
|
|
|
|
media_type=media_type,
|
2019-01-09 01:49:00 +00:00
|
|
|
manifest_bytes=manifest_interface_instance.bytes.as_encoded_str())
|
2018-11-05 18:03:08 +00:00
|
|
|
except IntegrityError:
|
|
|
|
manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest)
|
2018-11-12 21:27:49 +00:00
|
|
|
return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None)
|
2018-11-05 18:03:08 +00:00
|
|
|
|
|
|
|
# Insert the blobs.
|
|
|
|
blobs_to_insert = [dict(manifest=manifest, repository=repository_id,
|
|
|
|
blob=storage_id) for storage_id in storage_ids]
|
|
|
|
if blobs_to_insert:
|
|
|
|
ManifestBlob.insert_many(blobs_to_insert).execute()
|
|
|
|
|
|
|
|
# Set the legacy image (if applicable).
|
2018-11-12 21:27:49 +00:00
|
|
|
if legacy_image is not None:
|
|
|
|
ManifestLegacyImage.create(repository=repository_id, image=legacy_image, manifest=manifest)
|
|
|
|
|
|
|
|
# Insert the manifest child rows (if applicable).
|
|
|
|
if child_manifest_rows:
|
|
|
|
children_to_insert = [dict(manifest=manifest, child_manifest=child_manifest,
|
|
|
|
repository=repository_id)
|
2018-11-19 16:26:22 +00:00
|
|
|
for child_manifest in child_manifest_rows.values()]
|
2018-11-12 21:27:49 +00:00
|
|
|
ManifestChild.insert_many(children_to_insert).execute()
|
|
|
|
|
|
|
|
# Define the labels for the manifest (if any).
|
2018-11-19 10:24:04 +00:00
|
|
|
labels = manifest_interface_instance.get_manifest_labels(retriever)
|
2018-11-12 21:27:49 +00:00
|
|
|
if labels:
|
|
|
|
for key, value in labels.iteritems():
|
|
|
|
media_type = 'application/json' if is_json(value) else 'text/plain'
|
|
|
|
create_manifest_label(manifest, key, value, 'manifest', media_type)
|
|
|
|
|
2018-11-14 07:15:58 +00:00
|
|
|
# Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken
|
|
|
|
# on the manifest or its resulting tags). We only return those labels either defined on
|
|
|
|
# the manifest or shared amongst all the child manifests. We intersect amongst all child manifests
|
|
|
|
# to ensure that any action performed is defined in all manifests.
|
2018-11-12 21:27:49 +00:00
|
|
|
labels_to_apply = labels or {}
|
|
|
|
if child_manifest_label_dicts:
|
|
|
|
labels_to_apply = child_manifest_label_dicts[0].viewitems()
|
|
|
|
for child_manifest_label_dict in child_manifest_label_dicts[1:]:
|
|
|
|
# Intersect the key+values of the labels to ensure we get the exact same result
|
|
|
|
# for all the child manifests.
|
|
|
|
labels_to_apply = labels_to_apply & child_manifest_label_dict.viewitems()
|
|
|
|
|
|
|
|
labels_to_apply = dict(labels_to_apply)
|
|
|
|
|
|
|
|
return CreatedManifest(manifest=manifest, newly_created=True, labels_to_apply=labels_to_apply)
|
|
|
|
|
|
|
|
|
2018-11-19 10:24:04 +00:00
|
|
|
def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, retriever):
|
2018-11-05 18:03:08 +00:00
|
|
|
# Lookup all the images and their parent images (if any) inside the manifest.
|
|
|
|
# This will let us know which v1 images we need to synthesize and which ones are invalid.
|
2018-11-21 15:38:12 +00:00
|
|
|
docker_image_ids = list(manifest_interface_instance.get_legacy_image_ids(retriever))
|
2018-11-05 18:03:08 +00:00
|
|
|
images_query = lookup_repository_images(repository_id, docker_image_ids)
|
|
|
|
image_storage_map = {i.docker_image_id: i.storage for i in images_query}
|
|
|
|
|
|
|
|
# Rewrite any v1 image IDs that do not match the checksum in the database.
|
|
|
|
try:
|
2018-11-12 21:27:49 +00:00
|
|
|
rewritten_images = manifest_interface_instance.generate_legacy_layers(image_storage_map,
|
2018-11-19 10:24:04 +00:00
|
|
|
retriever)
|
2018-11-05 18:03:08 +00:00
|
|
|
rewritten_images = list(rewritten_images)
|
|
|
|
parent_image_map = {}
|
|
|
|
|
|
|
|
for rewritten_image in rewritten_images:
|
|
|
|
if not rewritten_image.image_id in image_storage_map:
|
|
|
|
parent_image = None
|
|
|
|
if rewritten_image.parent_image_id:
|
|
|
|
parent_image = parent_image_map.get(rewritten_image.parent_image_id)
|
|
|
|
if parent_image is None:
|
|
|
|
parent_image = get_image(repository_id, rewritten_image.parent_image_id)
|
|
|
|
if parent_image is None:
|
|
|
|
return None
|
|
|
|
|
2018-11-25 14:16:59 +00:00
|
|
|
storage_reference = blob_map[rewritten_image.content_checksum]
|
2018-11-05 18:03:08 +00:00
|
|
|
synthesized = synthesize_v1_image(
|
|
|
|
repository_id,
|
2018-11-25 14:16:59 +00:00
|
|
|
storage_reference.id,
|
|
|
|
storage_reference.image_size,
|
2018-11-05 18:03:08 +00:00
|
|
|
rewritten_image.image_id,
|
|
|
|
rewritten_image.created,
|
|
|
|
rewritten_image.comment,
|
|
|
|
rewritten_image.command,
|
|
|
|
rewritten_image.compat_json,
|
|
|
|
parent_image,
|
|
|
|
)
|
|
|
|
|
|
|
|
parent_image_map[rewritten_image.image_id] = synthesized
|
|
|
|
except ManifestException:
|
|
|
|
logger.exception("exception when rewriting v1 metadata")
|
|
|
|
return None
|
|
|
|
|
|
|
|
return rewritten_images[-1].image_id
|