Add support for creating schema 2 manifests and manifest lists via the OCI model

This commit is contained in:
Joseph Schorr 2018-11-12 23:27:49 +02:00
parent e344d4a5cf
commit 30f072aeff
16 changed files with 398 additions and 110 deletions

View file

@ -1,15 +1,28 @@
import logging
from collections import namedtuple
from peewee import IntegrityError
from data.database import Tag, Manifest, ManifestBlob, ManifestLegacyImage, db_transaction
from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild,
db_transaction)
from data.model import BlobDoesNotExist
from data.model.oci.tag import filter_to_alive_tags
from data.model.storage import lookup_repo_storages_by_content_checksum
from data.model.oci.label import create_manifest_label
from data.model.storage import (lookup_repo_storages_by_content_checksum, get_storage_locations,
get_layer_path)
from data.model.blob import get_repository_blob_by_digest
from data.model.image import lookup_repository_images, get_image, synthesize_v1_image
from image.docker.schema1 import DockerSchema1Manifest, ManifestException
from image.docker.schema1 import ManifestException
from image.docker.schema2.list import MalformedSchema2ManifestList
from util.validation import is_json
logger = logging.getLogger(__name__)
CreatedManifest = namedtuple('CreatedManifest', ['manifest', 'newly_created', 'labels_to_apply'])
def lookup_manifest(repository_id, manifest_digest, allow_dead=False):
""" Returns the manifest with the specified digest under the specified repository
or None if none. If allow_dead is True, then manifests referenced by only
@ -29,43 +42,97 @@ def lookup_manifest(repository_id, manifest_digest, allow_dead=False):
return None
def get_or_create_manifest(repository_id, manifest_interface_instance):
""" Returns a tuple of the manifest in the specified repository with the matching digest
(if it already exists) or, if not yet created, creates and returns the manifest, as well as
if the manifest was created. Returns (None, None) if there was an error creating the manifest.
def get_or_create_manifest(repository_id, manifest_interface_instance, storage):
""" Returns a CreatedManifest for the manifest in the specified repository with the matching
digest (if it already exists) or, if not yet created, creates and returns the manifest.
Returns None if there was an error creating the manifest.
Note that *all* blobs referenced by the manifest must exist already in the repository or this
method will fail with a (None, None).
method will fail with a None.
"""
existing = lookup_manifest(repository_id, manifest_interface_instance.digest, allow_dead=True)
if existing is not None:
return existing, False
return CreatedManifest(manifest=existing, newly_created=False, labels_to_apply=None)
assert len(list(manifest_interface_instance.layers)) > 0
return _create_manifest(repository_id, manifest_interface_instance, storage)
# TODO(jschorr): Switch this to supporting schema2 once we're ready.
assert isinstance(manifest_interface_instance, DockerSchema1Manifest)
def _create_manifest(repository_id, manifest_interface_instance, storage):
digests = set(manifest_interface_instance.blob_digests)
def _lookup_digest(digest):
return _retrieve_bytes_in_storage(repository_id, digest, storage)
# Retrieve the child manifests, if any. If we do retrieve a child manifest, we also remove its
# blob from the list of blobs for this manifest, as the blob isn't really a "blob".
child_manifest_refs = manifest_interface_instance.child_manifests(_lookup_digest)
child_manifest_rows = []
child_manifest_label_dicts = []
if child_manifest_refs is not None:
for child_manifest_ref in child_manifest_refs:
# Load and parse the child manifest.
try:
child_manifest = child_manifest_ref.manifest_obj
except ManifestException:
logger.exception('Could not load manifest list for manifest `%s`',
manifest_interface_instance.digest)
return None
except MalformedSchema2ManifestList:
logger.exception('Could not load manifest list for manifest `%s`',
manifest_interface_instance.digest)
return None
except BlobDoesNotExist:
logger.exception('Could not load manifest list for manifest `%s`',
manifest_interface_instance.digest)
return None
except IOError:
logger.exception('Could not load manifest list for manifest `%s`',
manifest_interface_instance.digest)
return None
# Retrieve its labels.
labels = child_manifest.get_manifest_labels(_lookup_digest)
if labels is None:
logger.exception('Could not load manifest labels for child manifest')
return None
# Get/create the child manifest in the database.
assert list(child_manifest.layers)
child_manifest_info = get_or_create_manifest(repository_id, child_manifest, storage)
if child_manifest_info is None:
logger.error('Could not get/create child manifest')
return None
child_manifest_rows.append(child_manifest_info.manifest)
child_manifest_label_dicts.append(labels)
digests.remove(child_manifest.digest)
# Ensure all the blobs in the manifest exist.
digests = manifest_interface_instance.checksums
query = lookup_repo_storages_by_content_checksum(repository_id, digests)
blob_map = {s.content_checksum: s for s in query}
for digest_str in manifest_interface_instance.blob_digests:
if digest_str not in blob_map:
logger.warning('Unknown blob `%s` under manifest `%s` for repository `%s`', digest_str,
manifest_interface_instance.digest, repository_id)
return None, None
blob_map = {}
if digests:
query = lookup_repo_storages_by_content_checksum(repository_id, digests)
blob_map = {s.content_checksum: s for s in query}
for digest_str in digests:
if digest_str not in blob_map:
logger.warning('Unknown blob `%s` under manifest `%s` for repository `%s`', digest_str,
manifest_interface_instance.digest, repository_id)
return None
# Determine and populate the legacy image if necessary.
legacy_image_id = _populate_legacy_image(repository_id, manifest_interface_instance, blob_map)
if legacy_image_id is None:
return None, None
# Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
# image.
legacy_image = None
if manifest_interface_instance.leaf_layer_v1_image_id is not None:
legacy_image_id = _populate_legacy_image(repository_id, manifest_interface_instance, blob_map,
storage)
if legacy_image_id is None:
return None
legacy_image = get_image(repository_id, legacy_image_id)
if legacy_image is None:
return None, None
legacy_image = get_image(repository_id, legacy_image_id)
if legacy_image is None:
return None
# Create the manifest and its blobs.
media_type = Manifest.media_type.get_id(manifest_interface_instance.content_type)
media_type = Manifest.media_type.get_id(manifest_interface_instance.media_type)
storage_ids = {storage.id for storage in blob_map.values()}
with db_transaction():
@ -77,7 +144,7 @@ def get_or_create_manifest(repository_id, manifest_interface_instance):
manifest_bytes=manifest_interface_instance.bytes)
except IntegrityError:
manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest)
return manifest, False
return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None)
# Insert the blobs.
blobs_to_insert = [dict(manifest=manifest, repository=repository_id,
@ -86,12 +153,42 @@ def get_or_create_manifest(repository_id, manifest_interface_instance):
ManifestBlob.insert_many(blobs_to_insert).execute()
# Set the legacy image (if applicable).
ManifestLegacyImage.create(repository=repository_id, image=legacy_image, manifest=manifest)
if legacy_image is not None:
ManifestLegacyImage.create(repository=repository_id, image=legacy_image, manifest=manifest)
return manifest, True
# Insert the manifest child rows (if applicable).
if child_manifest_rows:
children_to_insert = [dict(manifest=manifest, child_manifest=child_manifest,
repository=repository_id)
for child_manifest in child_manifest_rows]
ManifestChild.insert_many(children_to_insert).execute()
# Define the labels for the manifest (if any).
labels = manifest_interface_instance.get_manifest_labels(_lookup_digest)
if labels:
for key, value in labels.iteritems():
media_type = 'application/json' if is_json(value) else 'text/plain'
create_manifest_label(manifest, key, value, 'manifest', media_type)
# Return the dictionary of labels to apply. We only return those labels either defined on
# the manifest or shared amongst all the child manifest.
labels_to_apply = labels or {}
if child_manifest_label_dicts:
labels_to_apply = child_manifest_label_dicts[0].viewitems()
for child_manifest_label_dict in child_manifest_label_dicts[1:]:
# Intersect the key+values of the labels to ensure we get the exact same result
# for all the child manifests.
labels_to_apply = labels_to_apply & child_manifest_label_dict.viewitems()
labels_to_apply = dict(labels_to_apply)
return CreatedManifest(manifest=manifest, newly_created=True, labels_to_apply=labels_to_apply)
def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map):
def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, storage):
def _lookup_digest(digest):
return _retrieve_bytes_in_storage(repository_id, digest, storage)
# Lookup all the images and their parent images (if any) inside the manifest.
# This will let us know which v1 images we need to synthesize and which ones are invalid.
docker_image_ids = list(manifest_interface_instance.legacy_image_ids)
@ -100,7 +197,8 @@ def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map)
# Rewrite any v1 image IDs that do not match the checksum in the database.
try:
rewritten_images = manifest_interface_instance.rewrite_invalid_image_ids(image_storage_map)
rewritten_images = manifest_interface_instance.generate_legacy_layers(image_storage_map,
_lookup_digest)
rewritten_images = list(rewritten_images)
parent_image_map = {}
@ -132,3 +230,12 @@ def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map)
return None
return rewritten_images[-1].image_id
def _retrieve_bytes_in_storage(repository_id, digest, storage):
blob = get_repository_blob_by_digest(repository_id, digest)
if blob is None:
return None
placements = list(get_storage_locations(blob.uuid))
return storage.get_content(placements, get_layer_path(blob))

View file

@ -1,13 +1,22 @@
import json
from playhouse.test_utils import assert_query_count
from app import docker_v2_signing_key
from app import docker_v2_signing_key, storage
from data.database import Tag, ManifestBlob, get_epoch_timestamp_ms
from digest.digest_tools import sha256_digest
from data.database import Tag, ManifestBlob, ImageStorageLocation, ManifestChild, get_epoch_timestamp_ms
from data.model.oci.manifest import lookup_manifest, get_or_create_manifest
from data.model.oci.tag import filter_to_alive_tags, get_tag
from data.model.oci.shared import get_legacy_image_for_manifest
from data.model.repository import get_repository
from data.model.oci.label import list_manifest_labels
from data.model.repository import get_repository, create_repository
from data.model.image import find_create_or_link_image
from data.model.blob import store_blob_record_and_temp_link
from data.model.storage import get_layer_path
from image.docker.schema1 import DockerSchema1ManifestBuilder, DockerSchema1Manifest
from image.docker.schema2.manifest import DockerSchema2ManifestBuilder
from image.docker.schema2.list import DockerSchema2ManifestListBuilder
from test.fixtures import *
@ -38,35 +47,104 @@ def test_lookup_manifest_dead_tag(initialized_db):
dead_tag.manifest)
def test_get_or_create_manifest(initialized_db):
repository = get_repository('devtable', 'simple')
def _populate_blob(content):
digest = str(sha256_digest(content))
location = ImageStorageLocation.get(name='local_us')
blob = store_blob_record_and_temp_link('devtable', 'newrepo', digest, location, len(content), 120)
storage.put_content(['local_us'], get_layer_path(blob), content)
return blob, digest
latest_tag = get_tag(repository, 'latest')
legacy_image = get_legacy_image_for_manifest(latest_tag.manifest)
parsed = DockerSchema1Manifest(latest_tag.manifest.manifest_bytes, validate=False)
builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'anothertag')
builder.add_layer(parsed.blob_digests[0], '{"id": "%s"}' % legacy_image.docker_image_id)
sample_manifest_instance = builder.build(docker_v2_signing_key)
@pytest.mark.parametrize('schema_version', [
1,
2,
])
def test_get_or_create_manifest(schema_version, initialized_db):
repository = create_repository('devtable', 'newrepo', None)
expected_labels = {
'Foo': 'Bar',
'Baz': 'Meh',
}
layer_json = json.dumps({
'id': 'somelegacyid',
'config': {
'Labels': expected_labels,
},
"rootfs": {
"type": "layers",
"diff_ids": []
},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "do something",
},
],
})
# Create a legacy image.
find_create_or_link_image('somelegacyid', repository, 'devtable', {}, 'local_us')
# Add a blob containing the config.
_, config_digest = _populate_blob(layer_json)
# Add a blob of random data.
random_data = 'hello world'
_, random_digest = _populate_blob(random_data)
# Build the manifest.
if schema_version == 1:
builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'anothertag')
builder.add_layer(random_digest, layer_json)
sample_manifest_instance = builder.build(docker_v2_signing_key)
elif schema_version == 2:
builder = DockerSchema2ManifestBuilder()
builder.set_config_digest(config_digest, len(layer_json))
builder.add_layer(random_digest, len(random_data))
sample_manifest_instance = builder.build()
# Create a new manifest.
created, newly_created = get_or_create_manifest(repository, sample_manifest_instance)
created_manifest = get_or_create_manifest(repository, sample_manifest_instance, storage)
created = created_manifest.manifest
newly_created = created_manifest.newly_created
assert newly_created
assert created is not None
assert created.media_type.name == sample_manifest_instance.media_type
assert created.digest == sample_manifest_instance.digest
assert created.manifest_bytes == sample_manifest_instance.bytes
assert created_manifest.labels_to_apply == expected_labels
assert get_legacy_image_for_manifest(created) is not None
# Verify the legacy image.
legacy_image = get_legacy_image_for_manifest(created)
assert legacy_image is not None
assert legacy_image.storage.content_checksum == random_digest
# Verify the linked blobs.
blob_digests = [mb.blob.content_checksum for mb
in ManifestBlob.select().where(ManifestBlob.manifest == created)]
assert parsed.blob_digests[0] in blob_digests
assert random_digest in blob_digests
if schema_version == 2:
assert config_digest in blob_digests
# Retrieve it again and ensure it is the same manifest.
created2, newly_created2 = get_or_create_manifest(repository, sample_manifest_instance)
created_manifest2 = get_or_create_manifest(repository, sample_manifest_instance, storage)
created2 = created_manifest2.manifest
newly_created2 = created_manifest2.newly_created
assert not newly_created2
assert created2 == created
# Ensure the labels were added.
labels = list(list_manifest_labels(created))
assert len(labels) == 2
labels_dict = {label.key: label.value for label in labels}
assert labels_dict == expected_labels
def test_get_or_create_manifest_invalid_image(initialized_db):
repository = get_repository('devtable', 'simple')
@ -78,6 +156,86 @@ def test_get_or_create_manifest_invalid_image(initialized_db):
builder.add_layer(parsed.blob_digests[0], '{"id": "foo", "parent": "someinvalidimageid"}')
sample_manifest_instance = builder.build(docker_v2_signing_key)
created, newly_created = get_or_create_manifest(repository, sample_manifest_instance)
assert created is None
assert newly_created is None
created_manifest = get_or_create_manifest(repository, sample_manifest_instance, storage)
assert created_manifest is None
def test_get_or_create_manifest_list(initialized_db):
repository = create_repository('devtable', 'newrepo', None)
expected_labels = {
'Foo': 'Bar',
'Baz': 'Meh',
}
layer_json = json.dumps({
'id': 'somelegacyid',
'config': {
'Labels': expected_labels,
},
"rootfs": {
"type": "layers",
"diff_ids": []
},
"history": [
{
"created": "2018-04-03T18:37:09.284840891Z",
"created_by": "do something",
},
],
})
# Create a legacy image.
find_create_or_link_image('somelegacyid', repository, 'devtable', {}, 'local_us')
# Add a blob containing the config.
_, config_digest = _populate_blob(layer_json)
# Add a blob of random data.
random_data = 'hello world'
_, random_digest = _populate_blob(random_data)
# Build the manifests.
v1_builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'anothertag')
v1_builder.add_layer(random_digest, layer_json)
v1_manifest = v1_builder.build(docker_v2_signing_key).unsigned()
v2_builder = DockerSchema2ManifestBuilder()
v2_builder.set_config_digest(config_digest, len(layer_json))
v2_builder.add_layer(random_digest, len(random_data))
v2_manifest = v2_builder.build()
# Write the manifests as blobs.
location = ImageStorageLocation.get(name='local_us')
blob = store_blob_record_and_temp_link('devtable', 'newrepo', v1_manifest.digest, location,
len(v1_manifest.bytes), 120)
storage.put_content(['local_us'], get_layer_path(blob), v1_manifest.bytes)
blob = store_blob_record_and_temp_link('devtable', 'newrepo', v2_manifest.digest, location,
len(v2_manifest.bytes), 120)
storage.put_content(['local_us'], get_layer_path(blob), v2_manifest.bytes)
# Build the manifest list.
list_builder = DockerSchema2ManifestListBuilder()
list_builder.add_manifest(v1_manifest, 'amd64', 'linux')
list_builder.add_manifest(v2_manifest, 'amd32', 'linux')
manifest_list = list_builder.build()
# Write the manifest list, which should also write the manifests themselves.
created_tuple = get_or_create_manifest(repository, manifest_list, storage)
assert created_tuple is not None
created_list = created_tuple.manifest
assert created_list
assert created_list.media_type.name == manifest_list.media_type
assert created_list.digest == manifest_list.digest
# Ensure the child manifest links exist.
child_manifests = {cm.child_manifest.digest: cm.child_manifest
for cm in ManifestChild.select().where(ManifestChild.manifest == created_list)}
assert len(child_manifests) == 2
assert v1_manifest.digest in child_manifests
assert v2_manifest.digest in child_manifests
assert child_manifests[v1_manifest.digest].media_type.name == v1_manifest.media_type
assert child_manifests[v2_manifest.digest].media_type.name == v2_manifest.media_type