322 lines
14 KiB
Python
322 lines
14 KiB
Python
|
import logging
|
||
|
|
||
|
from collections import namedtuple
|
||
|
|
||
|
from peewee import IntegrityError
|
||
|
|
||
|
from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild,
|
||
|
db_transaction)
|
||
|
from data.model import BlobDoesNotExist
|
||
|
from data.model.blob import get_or_create_shared_blob, get_shared_blob
|
||
|
from data.model.oci.tag import filter_to_alive_tags, create_temporary_tag_if_necessary
|
||
|
from data.model.oci.label import create_manifest_label
|
||
|
from data.model.oci.retriever import RepositoryContentRetriever
|
||
|
from data.model.storage import lookup_repo_storages_by_content_checksum
|
||
|
from data.model.image import lookup_repository_images, get_image, synthesize_v1_image
|
||
|
from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES
|
||
|
from image.docker.schema1 import ManifestException
|
||
|
from image.docker.schema2.list import MalformedSchema2ManifestList
|
||
|
from util.validation import is_json
|
||
|
|
||
|
|
||
|
TEMP_TAG_EXPIRATION_SEC = 300 # 5 minutes
|
||
|
|
||
|
|
||
|
logger = logging.getLogger(__name__)
|
||
|
|
||
|
CreatedManifest = namedtuple('CreatedManifest', ['manifest', 'newly_created', 'labels_to_apply'])
|
||
|
|
||
|
|
||
|
class CreateManifestException(Exception):
|
||
|
""" Exception raised when creating a manifest fails and explicit exception
|
||
|
raising is requested. """
|
||
|
|
||
|
|
||
|
def lookup_manifest(repository_id, manifest_digest, allow_dead=False, require_available=False,
|
||
|
temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC):
|
||
|
""" Returns the manifest with the specified digest under the specified repository
|
||
|
or None if none. If allow_dead is True, then manifests referenced by only
|
||
|
dead tags will also be returned. If require_available is True, the manifest
|
||
|
will be marked with a temporary tag to ensure it remains available.
|
||
|
"""
|
||
|
if not require_available:
|
||
|
return _lookup_manifest(repository_id, manifest_digest, allow_dead=allow_dead)
|
||
|
|
||
|
with db_transaction():
|
||
|
found = _lookup_manifest(repository_id, manifest_digest, allow_dead=allow_dead)
|
||
|
if found is None:
|
||
|
return None
|
||
|
|
||
|
create_temporary_tag_if_necessary(found, temp_tag_expiration_sec)
|
||
|
return found
|
||
|
|
||
|
|
||
|
def _lookup_manifest(repository_id, manifest_digest, allow_dead=False):
|
||
|
query = (Manifest
|
||
|
.select()
|
||
|
.where(Manifest.repository == repository_id)
|
||
|
.where(Manifest.digest == manifest_digest))
|
||
|
|
||
|
if allow_dead:
|
||
|
try:
|
||
|
return query.get()
|
||
|
except Manifest.DoesNotExist:
|
||
|
return None
|
||
|
|
||
|
# Try first to filter to those manifests referenced by an alive tag,
|
||
|
try:
|
||
|
return filter_to_alive_tags(query.join(Tag)).get()
|
||
|
except Manifest.DoesNotExist:
|
||
|
pass
|
||
|
|
||
|
# Try referenced as the child of a manifest that has an alive tag.
|
||
|
query = (query
|
||
|
.join(ManifestChild, on=(ManifestChild.child_manifest == Manifest.id))
|
||
|
.join(Tag, on=(Tag.manifest == ManifestChild.manifest)))
|
||
|
|
||
|
query = filter_to_alive_tags(query)
|
||
|
|
||
|
try:
|
||
|
return query.get()
|
||
|
except Manifest.DoesNotExist:
|
||
|
return None
|
||
|
|
||
|
|
||
|
def get_or_create_manifest(repository_id, manifest_interface_instance, storage,
|
||
|
temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC,
|
||
|
for_tagging=False, raise_on_error=False):
|
||
|
""" Returns a CreatedManifest for the manifest in the specified repository with the matching
|
||
|
digest (if it already exists) or, if not yet created, creates and returns the manifest.
|
||
|
|
||
|
Returns None if there was an error creating the manifest, unless raise_on_error is specified,
|
||
|
in which case a CreateManifestException exception will be raised instead to provide more
|
||
|
context to the error.
|
||
|
|
||
|
Note that *all* blobs referenced by the manifest must exist already in the repository or this
|
||
|
method will fail with a None.
|
||
|
"""
|
||
|
existing = lookup_manifest(repository_id, manifest_interface_instance.digest, allow_dead=True,
|
||
|
require_available=True,
|
||
|
temp_tag_expiration_sec=temp_tag_expiration_sec)
|
||
|
if existing is not None:
|
||
|
return CreatedManifest(manifest=existing, newly_created=False, labels_to_apply=None)
|
||
|
|
||
|
return _create_manifest(repository_id, manifest_interface_instance, storage,
|
||
|
temp_tag_expiration_sec, for_tagging=for_tagging,
|
||
|
raise_on_error=raise_on_error)
|
||
|
|
||
|
|
||
|
def _create_manifest(repository_id, manifest_interface_instance, storage,
|
||
|
temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC,
|
||
|
for_tagging=False, raise_on_error=False):
|
||
|
# Validate the manifest.
|
||
|
retriever = RepositoryContentRetriever.for_repository(repository_id, storage)
|
||
|
try:
|
||
|
manifest_interface_instance.validate(retriever)
|
||
|
except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist, IOError) as ex:
|
||
|
logger.exception('Could not validate manifest `%s`', manifest_interface_instance.digest)
|
||
|
if raise_on_error:
|
||
|
raise CreateManifestException(ex)
|
||
|
|
||
|
return None
|
||
|
|
||
|
# Load, parse and get/create the child manifests, if any.
|
||
|
child_manifest_refs = manifest_interface_instance.child_manifests(retriever)
|
||
|
child_manifest_rows = {}
|
||
|
child_manifest_label_dicts = []
|
||
|
|
||
|
if child_manifest_refs is not None:
|
||
|
for child_manifest_ref in child_manifest_refs:
|
||
|
# Load and parse the child manifest.
|
||
|
try:
|
||
|
child_manifest = child_manifest_ref.manifest_obj
|
||
|
except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist, IOError) as ex:
|
||
|
logger.exception('Could not load manifest list for manifest `%s`',
|
||
|
manifest_interface_instance.digest)
|
||
|
if raise_on_error:
|
||
|
raise CreateManifestException(ex)
|
||
|
|
||
|
return None
|
||
|
|
||
|
# Retrieve its labels.
|
||
|
labels = child_manifest.get_manifest_labels(retriever)
|
||
|
if labels is None:
|
||
|
logger.exception('Could not load manifest labels for child manifest')
|
||
|
return None
|
||
|
|
||
|
# Get/create the child manifest in the database.
|
||
|
child_manifest_info = get_or_create_manifest(repository_id, child_manifest, storage,
|
||
|
raise_on_error=raise_on_error)
|
||
|
if child_manifest_info is None:
|
||
|
logger.error('Could not get/create child manifest')
|
||
|
return None
|
||
|
|
||
|
child_manifest_rows[child_manifest_info.manifest.digest] = child_manifest_info.manifest
|
||
|
child_manifest_label_dicts.append(labels)
|
||
|
|
||
|
# Ensure all the blobs in the manifest exist.
|
||
|
digests = set(manifest_interface_instance.local_blob_digests)
|
||
|
blob_map = {}
|
||
|
|
||
|
# If the special empty layer is required, simply load it directly. This is much faster
|
||
|
# than trying to load it on a per repository basis, and that is unnecessary anyway since
|
||
|
# this layer is predefined.
|
||
|
if EMPTY_LAYER_BLOB_DIGEST in digests:
|
||
|
digests.remove(EMPTY_LAYER_BLOB_DIGEST)
|
||
|
blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(EMPTY_LAYER_BLOB_DIGEST)
|
||
|
if not blob_map[EMPTY_LAYER_BLOB_DIGEST]:
|
||
|
logger.warning('Could not find the special empty blob in storage')
|
||
|
return None
|
||
|
|
||
|
if digests:
|
||
|
query = lookup_repo_storages_by_content_checksum(repository_id, digests)
|
||
|
blob_map.update({s.content_checksum: s for s in query})
|
||
|
for digest_str in digests:
|
||
|
if digest_str not in blob_map:
|
||
|
logger.warning('Unknown blob `%s` under manifest `%s` for repository `%s`', digest_str,
|
||
|
manifest_interface_instance.digest, repository_id)
|
||
|
|
||
|
if raise_on_error:
|
||
|
raise CreateManifestException('Unknown blob `%s`' % digest_str)
|
||
|
|
||
|
return None
|
||
|
|
||
|
# Special check: If the empty layer blob is needed for this manifest, add it to the
|
||
|
# blob map. This is necessary because Docker decided to elide sending of this special
|
||
|
# empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
|
||
|
if EMPTY_LAYER_BLOB_DIGEST not in blob_map:
|
||
|
if manifest_interface_instance.get_requires_empty_layer_blob(retriever):
|
||
|
shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage)
|
||
|
assert not shared_blob.uploading
|
||
|
assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
|
||
|
blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob
|
||
|
|
||
|
# Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
|
||
|
# image.
|
||
|
legacy_image = None
|
||
|
if manifest_interface_instance.has_legacy_image:
|
||
|
legacy_image_id = _populate_legacy_image(repository_id, manifest_interface_instance, blob_map,
|
||
|
retriever)
|
||
|
if legacy_image_id is None:
|
||
|
return None
|
||
|
|
||
|
legacy_image = get_image(repository_id, legacy_image_id)
|
||
|
if legacy_image is None:
|
||
|
return None
|
||
|
|
||
|
# Create the manifest and its blobs.
|
||
|
media_type = Manifest.media_type.get_id(manifest_interface_instance.media_type)
|
||
|
storage_ids = {storage.id for storage in blob_map.values()}
|
||
|
|
||
|
with db_transaction():
|
||
|
# Check for the manifest. This is necessary because Postgres doesn't handle IntegrityErrors
|
||
|
# well under transactions.
|
||
|
try:
|
||
|
manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest)
|
||
|
return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None)
|
||
|
except Manifest.DoesNotExist:
|
||
|
pass
|
||
|
|
||
|
# Create the manifest.
|
||
|
try:
|
||
|
manifest = Manifest.create(repository=repository_id,
|
||
|
digest=manifest_interface_instance.digest,
|
||
|
media_type=media_type,
|
||
|
manifest_bytes=manifest_interface_instance.bytes.as_encoded_str())
|
||
|
except IntegrityError:
|
||
|
manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest)
|
||
|
return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None)
|
||
|
|
||
|
# Insert the blobs.
|
||
|
blobs_to_insert = [dict(manifest=manifest, repository=repository_id,
|
||
|
blob=storage_id) for storage_id in storage_ids]
|
||
|
if blobs_to_insert:
|
||
|
ManifestBlob.insert_many(blobs_to_insert).execute()
|
||
|
|
||
|
# Set the legacy image (if applicable).
|
||
|
if legacy_image is not None:
|
||
|
ManifestLegacyImage.create(repository=repository_id, image=legacy_image, manifest=manifest)
|
||
|
|
||
|
# Insert the manifest child rows (if applicable).
|
||
|
if child_manifest_rows:
|
||
|
children_to_insert = [dict(manifest=manifest, child_manifest=child_manifest,
|
||
|
repository=repository_id)
|
||
|
for child_manifest in child_manifest_rows.values()]
|
||
|
ManifestChild.insert_many(children_to_insert).execute()
|
||
|
|
||
|
# If this manifest is being created not for immediate tagging, add a temporary tag to the
|
||
|
# manifest to ensure it isn't being GCed. If the manifest *is* for tagging, then since we're
|
||
|
# creating a new one here, it cannot be GCed (since it isn't referenced by anything yet), so
|
||
|
# its safe to elide the temp tag operation. If we ever change GC code to collect *all* manifests
|
||
|
# in a repository for GC, then we will have to reevaluate this optimization at that time.
|
||
|
if not for_tagging:
|
||
|
create_temporary_tag_if_necessary(manifest, temp_tag_expiration_sec)
|
||
|
|
||
|
# Define the labels for the manifest (if any).
|
||
|
labels = manifest_interface_instance.get_manifest_labels(retriever)
|
||
|
if labels:
|
||
|
for key, value in labels.iteritems():
|
||
|
media_type = 'application/json' if is_json(value) else 'text/plain'
|
||
|
create_manifest_label(manifest, key, value, 'manifest', media_type)
|
||
|
|
||
|
# Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken
|
||
|
# on the manifest or its resulting tags). We only return those labels either defined on
|
||
|
# the manifest or shared amongst all the child manifests. We intersect amongst all child manifests
|
||
|
# to ensure that any action performed is defined in all manifests.
|
||
|
labels_to_apply = labels or {}
|
||
|
if child_manifest_label_dicts:
|
||
|
labels_to_apply = child_manifest_label_dicts[0].viewitems()
|
||
|
for child_manifest_label_dict in child_manifest_label_dicts[1:]:
|
||
|
# Intersect the key+values of the labels to ensure we get the exact same result
|
||
|
# for all the child manifests.
|
||
|
labels_to_apply = labels_to_apply & child_manifest_label_dict.viewitems()
|
||
|
|
||
|
labels_to_apply = dict(labels_to_apply)
|
||
|
|
||
|
return CreatedManifest(manifest=manifest, newly_created=True, labels_to_apply=labels_to_apply)
|
||
|
|
||
|
|
||
|
def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, retriever):
|
||
|
# Lookup all the images and their parent images (if any) inside the manifest.
|
||
|
# This will let us know which v1 images we need to synthesize and which ones are invalid.
|
||
|
docker_image_ids = list(manifest_interface_instance.get_legacy_image_ids(retriever))
|
||
|
images_query = lookup_repository_images(repository_id, docker_image_ids)
|
||
|
image_storage_map = {i.docker_image_id: i.storage for i in images_query}
|
||
|
|
||
|
# Rewrite any v1 image IDs that do not match the checksum in the database.
|
||
|
try:
|
||
|
rewritten_images = manifest_interface_instance.generate_legacy_layers(image_storage_map,
|
||
|
retriever)
|
||
|
rewritten_images = list(rewritten_images)
|
||
|
parent_image_map = {}
|
||
|
|
||
|
for rewritten_image in rewritten_images:
|
||
|
if not rewritten_image.image_id in image_storage_map:
|
||
|
parent_image = None
|
||
|
if rewritten_image.parent_image_id:
|
||
|
parent_image = parent_image_map.get(rewritten_image.parent_image_id)
|
||
|
if parent_image is None:
|
||
|
parent_image = get_image(repository_id, rewritten_image.parent_image_id)
|
||
|
if parent_image is None:
|
||
|
return None
|
||
|
|
||
|
storage_reference = blob_map[rewritten_image.content_checksum]
|
||
|
synthesized = synthesize_v1_image(
|
||
|
repository_id,
|
||
|
storage_reference.id,
|
||
|
storage_reference.image_size,
|
||
|
rewritten_image.image_id,
|
||
|
rewritten_image.created,
|
||
|
rewritten_image.comment,
|
||
|
rewritten_image.command,
|
||
|
rewritten_image.compat_json,
|
||
|
parent_image,
|
||
|
)
|
||
|
|
||
|
parent_image_map[rewritten_image.image_id] = synthesized
|
||
|
except ManifestException:
|
||
|
logger.exception("exception when rewriting v1 metadata")
|
||
|
return None
|
||
|
|
||
|
return rewritten_images[-1].image_id
|