This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/data/model/oci/manifest.py

242 lines
10 KiB
Python
Raw Normal View History

import logging
from collections import namedtuple
from peewee import IntegrityError
from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild,
db_transaction)
from data.model import BlobDoesNotExist
from data.model.oci.tag import filter_to_alive_tags
from data.model.oci.label import create_manifest_label
from data.model.storage import (lookup_repo_storages_by_content_checksum, get_storage_locations,
get_layer_path)
from data.model.blob import get_repository_blob_by_digest
from data.model.image import lookup_repository_images, get_image, synthesize_v1_image
from image.docker.schema1 import ManifestException
from image.docker.schema2.list import MalformedSchema2ManifestList
from util.validation import is_json
logger = logging.getLogger(__name__)
CreatedManifest = namedtuple('CreatedManifest', ['manifest', 'newly_created', 'labels_to_apply'])
def lookup_manifest(repository_id, manifest_digest, allow_dead=False):
""" Returns the manifest with the specified digest under the specified repository
or None if none. If allow_dead is True, then manifests referenced by only
dead tags will also be returned.
"""
query = (Manifest
.select()
.where(Manifest.repository == repository_id)
.where(Manifest.digest == manifest_digest))
if not allow_dead:
query = filter_to_alive_tags(query.join(Tag)).group_by(Manifest.id)
try:
return query.get()
except Manifest.DoesNotExist:
return None
def get_or_create_manifest(repository_id, manifest_interface_instance, storage):
""" Returns a CreatedManifest for the manifest in the specified repository with the matching
digest (if it already exists) or, if not yet created, creates and returns the manifest.
Returns None if there was an error creating the manifest.
Note that *all* blobs referenced by the manifest must exist already in the repository or this
method will fail with a None.
"""
existing = lookup_manifest(repository_id, manifest_interface_instance.digest, allow_dead=True)
if existing is not None:
return CreatedManifest(manifest=existing, newly_created=False, labels_to_apply=None)
return _create_manifest(repository_id, manifest_interface_instance, storage)
def _create_manifest(repository_id, manifest_interface_instance, storage):
digests = set(manifest_interface_instance.blob_digests)
def _lookup_digest(digest):
return _retrieve_bytes_in_storage(repository_id, digest, storage)
# Retrieve the child manifests, if any. If we do retrieve a child manifest, we also remove its
# blob from the list of blobs for this manifest, as the blob isn't really a "blob".
child_manifest_refs = manifest_interface_instance.child_manifests(_lookup_digest)
child_manifest_rows = []
child_manifest_label_dicts = []
if child_manifest_refs is not None:
for child_manifest_ref in child_manifest_refs:
# Load and parse the child manifest.
try:
child_manifest = child_manifest_ref.manifest_obj
except ManifestException:
logger.exception('Could not load manifest list for manifest `%s`',
manifest_interface_instance.digest)
return None
except MalformedSchema2ManifestList:
logger.exception('Could not load manifest list for manifest `%s`',
manifest_interface_instance.digest)
return None
except BlobDoesNotExist:
logger.exception('Could not load manifest list for manifest `%s`',
manifest_interface_instance.digest)
return None
except IOError:
logger.exception('Could not load manifest list for manifest `%s`',
manifest_interface_instance.digest)
return None
# Retrieve its labels.
labels = child_manifest.get_manifest_labels(_lookup_digest)
if labels is None:
logger.exception('Could not load manifest labels for child manifest')
return None
# Get/create the child manifest in the database.
assert list(child_manifest.layers)
child_manifest_info = get_or_create_manifest(repository_id, child_manifest, storage)
if child_manifest_info is None:
logger.error('Could not get/create child manifest')
return None
child_manifest_rows.append(child_manifest_info.manifest)
child_manifest_label_dicts.append(labels)
digests.remove(child_manifest.digest)
# Ensure all the blobs in the manifest exist.
blob_map = {}
if digests:
query = lookup_repo_storages_by_content_checksum(repository_id, digests)
blob_map = {s.content_checksum: s for s in query}
for digest_str in digests:
if digest_str not in blob_map:
logger.warning('Unknown blob `%s` under manifest `%s` for repository `%s`', digest_str,
manifest_interface_instance.digest, repository_id)
return None
# Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
# image.
legacy_image = None
if manifest_interface_instance.leaf_layer_v1_image_id is not None:
legacy_image_id = _populate_legacy_image(repository_id, manifest_interface_instance, blob_map,
storage)
if legacy_image_id is None:
return None
legacy_image = get_image(repository_id, legacy_image_id)
if legacy_image is None:
return None
# Create the manifest and its blobs.
media_type = Manifest.media_type.get_id(manifest_interface_instance.media_type)
storage_ids = {storage.id for storage in blob_map.values()}
with db_transaction():
# Create the manifest.
try:
manifest = Manifest.create(repository=repository_id,
digest=manifest_interface_instance.digest,
media_type=media_type,
manifest_bytes=manifest_interface_instance.bytes)
except IntegrityError:
manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest)
return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None)
# Insert the blobs.
blobs_to_insert = [dict(manifest=manifest, repository=repository_id,
blob=storage_id) for storage_id in storage_ids]
if blobs_to_insert:
ManifestBlob.insert_many(blobs_to_insert).execute()
# Set the legacy image (if applicable).
if legacy_image is not None:
ManifestLegacyImage.create(repository=repository_id, image=legacy_image, manifest=manifest)
# Insert the manifest child rows (if applicable).
if child_manifest_rows:
children_to_insert = [dict(manifest=manifest, child_manifest=child_manifest,
repository=repository_id)
for child_manifest in child_manifest_rows]
ManifestChild.insert_many(children_to_insert).execute()
# Define the labels for the manifest (if any).
labels = manifest_interface_instance.get_manifest_labels(_lookup_digest)
if labels:
for key, value in labels.iteritems():
media_type = 'application/json' if is_json(value) else 'text/plain'
create_manifest_label(manifest, key, value, 'manifest', media_type)
# Return the dictionary of labels to apply. We only return those labels either defined on
# the manifest or shared amongst all the child manifest.
labels_to_apply = labels or {}
if child_manifest_label_dicts:
labels_to_apply = child_manifest_label_dicts[0].viewitems()
for child_manifest_label_dict in child_manifest_label_dicts[1:]:
# Intersect the key+values of the labels to ensure we get the exact same result
# for all the child manifests.
labels_to_apply = labels_to_apply & child_manifest_label_dict.viewitems()
labels_to_apply = dict(labels_to_apply)
return CreatedManifest(manifest=manifest, newly_created=True, labels_to_apply=labels_to_apply)
def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, storage):
def _lookup_digest(digest):
return _retrieve_bytes_in_storage(repository_id, digest, storage)
# Lookup all the images and their parent images (if any) inside the manifest.
# This will let us know which v1 images we need to synthesize and which ones are invalid.
docker_image_ids = list(manifest_interface_instance.legacy_image_ids)
images_query = lookup_repository_images(repository_id, docker_image_ids)
image_storage_map = {i.docker_image_id: i.storage for i in images_query}
# Rewrite any v1 image IDs that do not match the checksum in the database.
try:
rewritten_images = manifest_interface_instance.generate_legacy_layers(image_storage_map,
_lookup_digest)
rewritten_images = list(rewritten_images)
parent_image_map = {}
for rewritten_image in rewritten_images:
if not rewritten_image.image_id in image_storage_map:
parent_image = None
if rewritten_image.parent_image_id:
parent_image = parent_image_map.get(rewritten_image.parent_image_id)
if parent_image is None:
parent_image = get_image(repository_id, rewritten_image.parent_image_id)
if parent_image is None:
return None
synthesized = synthesize_v1_image(
repository_id,
blob_map[rewritten_image.content_checksum].id,
blob_map[rewritten_image.content_checksum].image_size,
rewritten_image.image_id,
rewritten_image.created,
rewritten_image.comment,
rewritten_image.command,
rewritten_image.compat_json,
parent_image,
)
parent_image_map[rewritten_image.image_id] = synthesized
except ManifestException:
logger.exception("exception when rewriting v1 metadata")
return None
return rewritten_images[-1].image_id
def _retrieve_bytes_in_storage(repository_id, digest, storage):
blob = get_repository_blob_by_digest(repository_id, digest)
if blob is None:
return None
placements = list(get_storage_locations(blob.uuid))
return storage.get_content(placements, get_layer_path(blob))