Properly handle the empty layer when pushing schema 2 manifests

Docker doesn't send us the contents of this layer, so we are forced to synthesize it ourselves
This commit is contained in:
Joseph Schorr 2018-11-25 16:16:59 +02:00
parent 947c029afa
commit 4985040d31
13 changed files with 173 additions and 25 deletions

View file

@ -1,3 +1,5 @@
import logging
from datetime import datetime
from uuid import uuid4
@ -7,6 +9,9 @@ from data.database import (Repository, Namespace, ImageStorage, Image, ImageStor
BlobUpload, ImageStorageLocation, db_random_func)
logger = logging.getLogger(__name__)
def get_repository_blob_by_digest(repository, blob_digest):
""" Find the content-addressable blob linked to the specified repository.
"""
@ -157,3 +162,31 @@ def initiate_upload(namespace, repo_name, uuid, location_name, storage_metadata)
location = storage_model.get_image_location_for_name(location_name)
return BlobUpload.create(repository=repo, location=location.id, uuid=uuid,
storage_metadata=storage_metadata)
def get_or_create_shared_blob(digest, byte_data, storage):
""" Returns the ImageStorage blob with the given digest or, if not present,
adds a row and writes the given byte data to the storage engine.
This method is *only* to be used for shared blobs that are globally
accessible, such as the special empty gzipped tar layer that Docker
no longer pushes to us.
"""
try:
return ImageStorage.get(content_checksum=digest, uploading=False)
except ImageStorage.DoesNotExist:
record = ImageStorage.create(image_size=len(byte_data), content_checksum=digest,
cas_path=True, uploading=True)
preferred = storage.preferred_locations[0]
location_obj = ImageStorageLocation.get(name=preferred)
try:
storage.put_content([preferred], storage_model.get_layer_path(record), byte_data)
ImageStoragePlacement.create(storage=record, location=location_obj)
record.uploading = False
record.save()
except:
logger.exception('Exception when trying to write special layer %s', digest)
record.delete_instance()
raise
return record

View file

@ -7,11 +7,13 @@ from peewee import IntegrityError, JOIN
from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild,
db_transaction)
from data.model import BlobDoesNotExist
from data.model.blob import get_or_create_shared_blob
from data.model.oci.tag import filter_to_alive_tags
from data.model.oci.label import create_manifest_label
from data.model.oci.retriever import RepositoryContentRetriever
from data.model.storage import lookup_repo_storages_by_content_checksum
from data.model.image import lookup_repository_images, get_image, synthesize_v1_image
from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES
from image.docker.schema1 import ManifestException
from image.docker.schema2.list import MalformedSchema2ManifestList
from util.validation import is_json
@ -121,6 +123,15 @@ def _create_manifest(repository_id, manifest_interface_instance, storage):
manifest_interface_instance.digest, repository_id)
return None
# Special check: If the empty layer blob is needed for this manifest, add it to the
# blob map. This is necessary because Docker decided to elide sending of this special
# empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
if manifest_interface_instance.get_requires_empty_layer_blob(retriever):
shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage)
assert not shared_blob.uploading
assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob
# Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
# image.
legacy_image = None
@ -214,10 +225,11 @@ def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map,
if parent_image is None:
return None
storage_reference = blob_map[rewritten_image.content_checksum]
synthesized = synthesize_v1_image(
repository_id,
blob_map[rewritten_image.content_checksum].id,
blob_map[rewritten_image.content_checksum].image_size,
storage_reference.id,
storage_reference.image_size,
rewritten_image.image_id,
rewritten_image.created,
rewritten_image.comment,

View file

@ -82,6 +82,11 @@ class ManifestInterface(object):
of manifest does not support labels. """
pass
@abstractmethod
def get_requires_empty_layer_blob(self, content_retriever):
""" Whether this schema requires the special empty layer blob. """
pass
@abstractmethod
def unsigned(self):
""" Returns an unsigned version of this manifest. """

View file

@ -312,6 +312,9 @@ class DockerSchema1Manifest(ManifestInterface):
def get_manifest_labels(self, content_retriever):
return self.layers[-1].v1_metadata.labels
def get_requires_empty_layer_blob(self, content_retriever):
return False
def unsigned(self):
if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE:
return self

View file

@ -19,3 +19,12 @@ OCI_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.oci.image.index.v1+json'
DOCKER_SCHEMA2_CONTENT_TYPES = {DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE}
OCI_CONTENT_TYPES = {OCI_MANIFEST_CONTENT_TYPE, OCI_MANIFESTLIST_CONTENT_TYPE}
# The magical digest to be used for "empty" layers.
# https://github.com/docker/distribution/blob/749f6afb4572201e3c37325d0ffedb6f32be8950/manifest/schema1/config_builder.go#L22
EMPTY_LAYER_BLOB_DIGEST = 'sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4'
EMPTY_LAYER_SIZE = 32
EMPTY_LAYER_BYTES = "".join(map(chr, [
31, 139, 8, 0, 0, 9, 110, 136, 0, 255, 98, 24, 5, 163, 96, 20, 140, 88,
0, 8, 0, 0, 255, 255, 46, 175, 181, 239, 0, 4, 0, 0,
]))

View file

@ -205,6 +205,15 @@ class DockerSchema2Config(object):
""" Returns a dictionary of all the labels defined in this configuration. """
return self._parsed.get('config', {}).get('Labels', {}) or {}
@property
def has_empty_layer(self):
""" Returns whether this config contains an empty layer. """
for history_entry in self._parsed[DOCKER_SCHEMA2_CONFIG_HISTORY_KEY]:
if history_entry.get(DOCKER_SCHEMA2_CONFIG_EMPTY_LAYER_KEY, False):
return True
return False
@property
def history(self):
""" Returns the history of the image, started at the base layer. """

View file

@ -255,6 +255,9 @@ class DockerSchema2ManifestList(ManifestInterface):
def has_legacy_image(self):
return False
def get_requires_empty_layer_blob(self, content_retriever):
return False
def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
If none, returns None.

View file

@ -11,7 +11,8 @@ from image.docker.interfaces import ManifestInterface
from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE,
DOCKER_SCHEMA2_LAYER_CONTENT_TYPE,
DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE)
DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE,
EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_SIZE)
from image.docker.schema1 import DockerSchema1ManifestBuilder
from image.docker.schema2.config import DockerSchema2Config
@ -34,8 +35,6 @@ ManifestImageLayer = namedtuple('ManifestImageLayer', ['history', 'blob_layer',
'v1_parent_id', 'compressed_size',
'blob_digest'])
EMPTY_BLOB_DIGEST = 'sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4'
logger = logging.getLogger(__name__)
class MalformedSchema2Manifest(ManifestException):
@ -233,8 +232,8 @@ class DockerSchema2Manifest(ManifestInterface):
v1_layer_parent_id = v1_layer_id
blob_layer = None if history_entry.is_empty else self.layers[blob_index]
blob_digest = EMPTY_BLOB_DIGEST if blob_layer is None else str(blob_layer.digest)
compressed_size = 0 if blob_layer is None else blob_layer.compressed_size
blob_digest = EMPTY_LAYER_BLOB_DIGEST if blob_layer is None else str(blob_layer.digest)
compressed_size = EMPTY_LAYER_SIZE if blob_layer is None else blob_layer.compressed_size
# Create a new synthesized V1 ID for the history layer by hashing its content and
# the blob associated withn it.
@ -295,6 +294,13 @@ class DockerSchema2Manifest(ManifestInterface):
def unsigned(self):
return self
def get_requires_empty_layer_blob(self, content_retriever):
schema2_config = self._get_built_config(content_retriever)
if schema2_config is None:
return None
return schema2_config.has_empty_layer
def _populate_schema1_builder(self, v1_builder, content_retriever):
""" Populates a DockerSchema1ManifestBuilder with the layers and config from
this schema.

View file

@ -1,4 +1,8 @@
import json
import tarfile
from cachetools import lru_cache
from io import BytesIO
from image.docker.interfaces import ContentRetriever
@ -22,3 +26,12 @@ class ContentRetrieverForTesting(ContentRetriever):
digests = {}
digests[digest] = padded_string
return ContentRetrieverForTesting(digests)
@lru_cache(maxsize=1)
def generate_empty_layer_data():
""" Generates the layer data for an "empty" layer. """
with BytesIO() as f:
tar_file = tarfile.open(fileobj=f, mode='w|gw')
tar_file.close()
return f.getvalue()

View file

@ -24,7 +24,6 @@ def basic_images():
]
@pytest.fixture(scope="session")
def different_images():
""" Returns different basic images for push and pull testing. """
@ -37,7 +36,6 @@ def different_images():
]
@pytest.fixture(scope="session")
def sized_images():
""" Returns basic images (with sizes) for push and pull testing. """
@ -106,6 +104,24 @@ def remote_images():
]
@pytest.fixture(scope="session")
def images_with_empty_layer():
""" Returns images for push and pull testing that contain an empty layer. """
# Note: order is from base layer down to leaf.
parent_bytes = layer_bytes_for_contents('parent contents')
empty_bytes = layer_bytes_for_contents('', empty=True)
image_bytes = layer_bytes_for_contents('some contents')
middle_bytes = layer_bytes_for_contents('middle')
return [
Image(id='parentid', bytes=parent_bytes, parent_id=None),
Image(id='emptyid', bytes=empty_bytes, parent_id='parentid', is_empty=True),
Image(id='middleid', bytes=middle_bytes, parent_id='emptyid'),
Image(id='emptyid2', bytes=empty_bytes, parent_id='middleid', is_empty=True),
Image(id='someid', bytes=image_bytes, parent_id='emptyid2'),
]
@pytest.fixture(scope="session")
def jwk():
return RSAKey(key=RSA.generate(2048))
@ -161,10 +177,10 @@ def legacy_pusher(request, data_model, jwk):
@pytest.fixture(params=['v1', 'v2_1', 'v2_2'])
def puller(request, data_model, jwk):
if request == 'v1':
if request.param == 'v1':
return V1Protocol(jwk)
if request == 'v2_2' and data_model == 'oci_model':
if request.param == 'v2_2' and data_model == 'oci_model':
return V2Protocol(jwk, schema2=True)
return V2Protocol(jwk)

View file

@ -249,7 +249,21 @@ class V2Protocol(RegistryProtocol):
if options.manifest_invalid_blob_references:
checksum = 'sha256:' + hashlib.sha256('notarealthing').hexdigest()
builder.add_layer(checksum, len(image.bytes), urls=image.urls)
if not image.is_empty:
builder.add_layer(checksum, len(image.bytes), urls=image.urls)
def history_for_image(image):
history = {
'created': '2018-04-03T18:37:09.284840891Z',
'created_by': (('/bin/sh -c #(nop) ENTRYPOINT %s' % image.config['Entrypoint'])
if image.config and image.config.get('Entrypoint')
else '/bin/sh -c #(nop) %s' % image.id),
}
if image.is_empty:
history['empty_layer'] = True
return history
config = {
"os": "linux",
@ -257,12 +271,7 @@ class V2Protocol(RegistryProtocol):
"type": "layers",
"diff_ids": []
},
"history": [{
'created': '2018-04-03T18:37:09.284840891Z',
'created_by': (('/bin/sh -c #(nop) ENTRYPOINT %s' % image.config['Entrypoint'])
if image.config and image.config.get('Entrypoint')
else '/bin/sh -c #(nop) %s' % image.id),
} for image in images],
"history": [history_for_image(image) for image in images],
}
if images[-1].config:
@ -535,17 +544,28 @@ class V2Protocol(RegistryProtocol):
image_ids[tag_name] = manifest.leaf_layer_v1_image_id
# Verify the layers.
for index, layer in enumerate(manifest.layers):
layer_index = 0
empty_count = 0
for image in images:
if manifest.schema_version == 2 and image.is_empty:
empty_count += 1
continue
# If the layer is remote, then we expect the blob to *not* exist in the system.
expected_status = 404 if images[index].urls else 200
layer = manifest.layers[layer_index]
expected_status = 404 if image.urls else 200
result = self.conduct(session, 'GET',
'/v2/%s/blobs/%s' % (self.repo_name(namespace, repo_name),
layer.digest),
expected_status=expected_status,
headers=headers)
if expected_status == 200:
assert result.content == images[index].bytes
assert result.content == image.bytes
layer_index += 1
assert (len(manifest.layers) + empty_count) == len(images)
return PullResult(manifests=manifests, image_ids=image_ids)

View file

@ -7,14 +7,20 @@ from cStringIO import StringIO
from enum import Enum, unique
from six import add_metaclass
Image = namedtuple('Image', ['id', 'parent_id', 'bytes', 'size', 'config', 'created', 'urls'])
Image.__new__.__defaults__ = (None, None, None, None)
from image.docker.schema2 import EMPTY_LAYER_BYTES
Image = namedtuple('Image', ['id', 'parent_id', 'bytes', 'size', 'config', 'created', 'urls',
'is_empty'])
Image.__new__.__defaults__ = (None, None, None, None, False)
PushResult = namedtuple('PushResult', ['manifests', 'headers'])
PullResult = namedtuple('PullResult', ['manifests', 'image_ids'])
def layer_bytes_for_contents(contents, mode='|gz', other_files=None):
def layer_bytes_for_contents(contents, mode='|gz', other_files=None, empty=False):
if empty:
return EMPTY_LAYER_BYTES
layer_data = StringIO()
tar_file = tarfile.open(fileobj=layer_data, mode='w' + mode)

View file

@ -39,6 +39,19 @@ def test_basic_push_pull(pusher, puller, basic_images, liveserver_session, app_r
credentials=credentials)
def test_empty_layer(pusher, puller, images_with_empty_layer, liveserver_session, app_reloader):
""" Test: Push and pull of an image with an empty layer to a new repository. """
credentials = ('devtable', 'password')
# Push a new repository.
pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', images_with_empty_layer,
credentials=credentials)
# Pull the repository to verify.
puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', images_with_empty_layer,
credentials=credentials)
def test_multi_layer_images_push_pull(pusher, puller, multi_layer_images, liveserver_session,
app_reloader):
""" Test: Basic push and pull of a multi-layered image to a new repository. """