diff --git a/data/model/blob.py b/data/model/blob.py index 6f6ff1f14..7c09c1334 100644 --- a/data/model/blob.py +++ b/data/model/blob.py @@ -1,3 +1,5 @@ +import logging + from datetime import datetime from uuid import uuid4 @@ -7,6 +9,9 @@ from data.database import (Repository, Namespace, ImageStorage, Image, ImageStor BlobUpload, ImageStorageLocation, db_random_func) +logger = logging.getLogger(__name__) + + def get_repository_blob_by_digest(repository, blob_digest): """ Find the content-addressable blob linked to the specified repository. """ @@ -157,3 +162,31 @@ def initiate_upload(namespace, repo_name, uuid, location_name, storage_metadata) location = storage_model.get_image_location_for_name(location_name) return BlobUpload.create(repository=repo, location=location.id, uuid=uuid, storage_metadata=storage_metadata) + + +def get_or_create_shared_blob(digest, byte_data, storage): + """ Returns the ImageStorage blob with the given digest or, if not present, + adds a row and writes the given byte data to the storage engine. + This method is *only* to be used for shared blobs that are globally + accessible, such as the special empty gzipped tar layer that Docker + no longer pushes to us. + """ + try: + return ImageStorage.get(content_checksum=digest, uploading=False) + except ImageStorage.DoesNotExist: + record = ImageStorage.create(image_size=len(byte_data), content_checksum=digest, + cas_path=True, uploading=True) + preferred = storage.preferred_locations[0] + location_obj = ImageStorageLocation.get(name=preferred) + try: + storage.put_content([preferred], storage_model.get_layer_path(record), byte_data) + ImageStoragePlacement.create(storage=record, location=location_obj) + + record.uploading = False + record.save() + except: + logger.exception('Exception when trying to write special layer %s', digest) + record.delete_instance() + raise + + return record diff --git a/data/model/oci/manifest.py b/data/model/oci/manifest.py index de20e049e..3181ef5e7 100644 --- a/data/model/oci/manifest.py +++ b/data/model/oci/manifest.py @@ -7,11 +7,13 @@ from peewee import IntegrityError, JOIN from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild, db_transaction) from data.model import BlobDoesNotExist +from data.model.blob import get_or_create_shared_blob from data.model.oci.tag import filter_to_alive_tags from data.model.oci.label import create_manifest_label from data.model.oci.retriever import RepositoryContentRetriever from data.model.storage import lookup_repo_storages_by_content_checksum from data.model.image import lookup_repository_images, get_image, synthesize_v1_image +from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES from image.docker.schema1 import ManifestException from image.docker.schema2.list import MalformedSchema2ManifestList from util.validation import is_json @@ -121,6 +123,15 @@ def _create_manifest(repository_id, manifest_interface_instance, storage): manifest_interface_instance.digest, repository_id) return None + # Special check: If the empty layer blob is needed for this manifest, add it to the + # blob map. This is necessary because Docker decided to elide sending of this special + # empty layer in schema version 2, but we need to have it referenced for GC and schema version 1. + if manifest_interface_instance.get_requires_empty_layer_blob(retriever): + shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage) + assert not shared_blob.uploading + assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST + blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob + # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy # image. legacy_image = None @@ -214,10 +225,11 @@ def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map, if parent_image is None: return None + storage_reference = blob_map[rewritten_image.content_checksum] synthesized = synthesize_v1_image( repository_id, - blob_map[rewritten_image.content_checksum].id, - blob_map[rewritten_image.content_checksum].image_size, + storage_reference.id, + storage_reference.image_size, rewritten_image.image_id, rewritten_image.created, rewritten_image.comment, diff --git a/image/docker/interfaces.py b/image/docker/interfaces.py index 3fd498a79..909ef1940 100644 --- a/image/docker/interfaces.py +++ b/image/docker/interfaces.py @@ -82,6 +82,11 @@ class ManifestInterface(object): of manifest does not support labels. """ pass + @abstractmethod + def get_requires_empty_layer_blob(self, content_retriever): + """ Whether this schema requires the special empty layer blob. """ + pass + @abstractmethod def unsigned(self): """ Returns an unsigned version of this manifest. """ diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 7c5f1a3a2..4523df0d2 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -312,6 +312,9 @@ class DockerSchema1Manifest(ManifestInterface): def get_manifest_labels(self, content_retriever): return self.layers[-1].v1_metadata.labels + def get_requires_empty_layer_blob(self, content_retriever): + return False + def unsigned(self): if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE: return self diff --git a/image/docker/schema2/__init__.py b/image/docker/schema2/__init__.py index 6d3392c41..8477596ae 100644 --- a/image/docker/schema2/__init__.py +++ b/image/docker/schema2/__init__.py @@ -19,3 +19,12 @@ OCI_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.oci.image.index.v1+json' DOCKER_SCHEMA2_CONTENT_TYPES = {DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE} OCI_CONTENT_TYPES = {OCI_MANIFEST_CONTENT_TYPE, OCI_MANIFESTLIST_CONTENT_TYPE} + +# The magical digest to be used for "empty" layers. +# https://github.com/docker/distribution/blob/749f6afb4572201e3c37325d0ffedb6f32be8950/manifest/schema1/config_builder.go#L22 +EMPTY_LAYER_BLOB_DIGEST = 'sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4' +EMPTY_LAYER_SIZE = 32 +EMPTY_LAYER_BYTES = "".join(map(chr, [ + 31, 139, 8, 0, 0, 9, 110, 136, 0, 255, 98, 24, 5, 163, 96, 20, 140, 88, + 0, 8, 0, 0, 255, 255, 46, 175, 181, 239, 0, 4, 0, 0, +])) diff --git a/image/docker/schema2/config.py b/image/docker/schema2/config.py index ea8551f38..d6a6bf81b 100644 --- a/image/docker/schema2/config.py +++ b/image/docker/schema2/config.py @@ -205,6 +205,15 @@ class DockerSchema2Config(object): """ Returns a dictionary of all the labels defined in this configuration. """ return self._parsed.get('config', {}).get('Labels', {}) or {} + @property + def has_empty_layer(self): + """ Returns whether this config contains an empty layer. """ + for history_entry in self._parsed[DOCKER_SCHEMA2_CONFIG_HISTORY_KEY]: + if history_entry.get(DOCKER_SCHEMA2_CONFIG_EMPTY_LAYER_KEY, False): + return True + + return False + @property def history(self): """ Returns the history of the image, started at the base layer. """ diff --git a/image/docker/schema2/list.py b/image/docker/schema2/list.py index 9f153ddbb..f02c55072 100644 --- a/image/docker/schema2/list.py +++ b/image/docker/schema2/list.py @@ -255,6 +255,9 @@ class DockerSchema2ManifestList(ManifestInterface): def has_legacy_image(self): return False + def get_requires_empty_layer_blob(self, content_retriever): + return False + def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever): """ Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`. If none, returns None. diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index 1c26acd52..ba7e3e84c 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -11,7 +11,8 @@ from image.docker.interfaces import ManifestInterface from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE, DOCKER_SCHEMA2_LAYER_CONTENT_TYPE, - DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE) + DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE, + EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_SIZE) from image.docker.schema1 import DockerSchema1ManifestBuilder from image.docker.schema2.config import DockerSchema2Config @@ -34,8 +35,6 @@ ManifestImageLayer = namedtuple('ManifestImageLayer', ['history', 'blob_layer', 'v1_parent_id', 'compressed_size', 'blob_digest']) -EMPTY_BLOB_DIGEST = 'sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4' - logger = logging.getLogger(__name__) class MalformedSchema2Manifest(ManifestException): @@ -233,8 +232,8 @@ class DockerSchema2Manifest(ManifestInterface): v1_layer_parent_id = v1_layer_id blob_layer = None if history_entry.is_empty else self.layers[blob_index] - blob_digest = EMPTY_BLOB_DIGEST if blob_layer is None else str(blob_layer.digest) - compressed_size = 0 if blob_layer is None else blob_layer.compressed_size + blob_digest = EMPTY_LAYER_BLOB_DIGEST if blob_layer is None else str(blob_layer.digest) + compressed_size = EMPTY_LAYER_SIZE if blob_layer is None else blob_layer.compressed_size # Create a new synthesized V1 ID for the history layer by hashing its content and # the blob associated withn it. @@ -295,6 +294,13 @@ class DockerSchema2Manifest(ManifestInterface): def unsigned(self): return self + def get_requires_empty_layer_blob(self, content_retriever): + schema2_config = self._get_built_config(content_retriever) + if schema2_config is None: + return None + + return schema2_config.has_empty_layer + def _populate_schema1_builder(self, v1_builder, content_retriever): """ Populates a DockerSchema1ManifestBuilder with the layers and config from this schema. diff --git a/image/docker/schemautil.py b/image/docker/schemautil.py index 1a231c62c..faf123c2b 100644 --- a/image/docker/schemautil.py +++ b/image/docker/schemautil.py @@ -1,4 +1,8 @@ import json +import tarfile + +from cachetools import lru_cache +from io import BytesIO from image.docker.interfaces import ContentRetriever @@ -22,3 +26,12 @@ class ContentRetrieverForTesting(ContentRetriever): digests = {} digests[digest] = padded_string return ContentRetrieverForTesting(digests) + + +@lru_cache(maxsize=1) +def generate_empty_layer_data(): + """ Generates the layer data for an "empty" layer. """ + with BytesIO() as f: + tar_file = tarfile.open(fileobj=f, mode='w|gw') + tar_file.close() + return f.getvalue() diff --git a/test/registry/protocol_fixtures.py b/test/registry/protocol_fixtures.py index a418e90de..0b7fd2b44 100644 --- a/test/registry/protocol_fixtures.py +++ b/test/registry/protocol_fixtures.py @@ -24,7 +24,6 @@ def basic_images(): ] - @pytest.fixture(scope="session") def different_images(): """ Returns different basic images for push and pull testing. """ @@ -37,7 +36,6 @@ def different_images(): ] - @pytest.fixture(scope="session") def sized_images(): """ Returns basic images (with sizes) for push and pull testing. """ @@ -106,6 +104,24 @@ def remote_images(): ] +@pytest.fixture(scope="session") +def images_with_empty_layer(): + """ Returns images for push and pull testing that contain an empty layer. """ + # Note: order is from base layer down to leaf. + parent_bytes = layer_bytes_for_contents('parent contents') + empty_bytes = layer_bytes_for_contents('', empty=True) + image_bytes = layer_bytes_for_contents('some contents') + middle_bytes = layer_bytes_for_contents('middle') + + return [ + Image(id='parentid', bytes=parent_bytes, parent_id=None), + Image(id='emptyid', bytes=empty_bytes, parent_id='parentid', is_empty=True), + Image(id='middleid', bytes=middle_bytes, parent_id='emptyid'), + Image(id='emptyid2', bytes=empty_bytes, parent_id='middleid', is_empty=True), + Image(id='someid', bytes=image_bytes, parent_id='emptyid2'), + ] + + @pytest.fixture(scope="session") def jwk(): return RSAKey(key=RSA.generate(2048)) @@ -161,10 +177,10 @@ def legacy_pusher(request, data_model, jwk): @pytest.fixture(params=['v1', 'v2_1', 'v2_2']) def puller(request, data_model, jwk): - if request == 'v1': + if request.param == 'v1': return V1Protocol(jwk) - if request == 'v2_2' and data_model == 'oci_model': + if request.param == 'v2_2' and data_model == 'oci_model': return V2Protocol(jwk, schema2=True) return V2Protocol(jwk) diff --git a/test/registry/protocol_v2.py b/test/registry/protocol_v2.py index 9309d5b4a..a79dc4101 100644 --- a/test/registry/protocol_v2.py +++ b/test/registry/protocol_v2.py @@ -249,7 +249,21 @@ class V2Protocol(RegistryProtocol): if options.manifest_invalid_blob_references: checksum = 'sha256:' + hashlib.sha256('notarealthing').hexdigest() - builder.add_layer(checksum, len(image.bytes), urls=image.urls) + if not image.is_empty: + builder.add_layer(checksum, len(image.bytes), urls=image.urls) + + def history_for_image(image): + history = { + 'created': '2018-04-03T18:37:09.284840891Z', + 'created_by': (('/bin/sh -c #(nop) ENTRYPOINT %s' % image.config['Entrypoint']) + if image.config and image.config.get('Entrypoint') + else '/bin/sh -c #(nop) %s' % image.id), + } + + if image.is_empty: + history['empty_layer'] = True + + return history config = { "os": "linux", @@ -257,12 +271,7 @@ class V2Protocol(RegistryProtocol): "type": "layers", "diff_ids": [] }, - "history": [{ - 'created': '2018-04-03T18:37:09.284840891Z', - 'created_by': (('/bin/sh -c #(nop) ENTRYPOINT %s' % image.config['Entrypoint']) - if image.config and image.config.get('Entrypoint') - else '/bin/sh -c #(nop) %s' % image.id), - } for image in images], + "history": [history_for_image(image) for image in images], } if images[-1].config: @@ -535,17 +544,28 @@ class V2Protocol(RegistryProtocol): image_ids[tag_name] = manifest.leaf_layer_v1_image_id # Verify the layers. - for index, layer in enumerate(manifest.layers): + layer_index = 0 + empty_count = 0 + for image in images: + if manifest.schema_version == 2 and image.is_empty: + empty_count += 1 + continue + # If the layer is remote, then we expect the blob to *not* exist in the system. - expected_status = 404 if images[index].urls else 200 + layer = manifest.layers[layer_index] + expected_status = 404 if image.urls else 200 result = self.conduct(session, 'GET', '/v2/%s/blobs/%s' % (self.repo_name(namespace, repo_name), layer.digest), expected_status=expected_status, headers=headers) - + if expected_status == 200: - assert result.content == images[index].bytes + assert result.content == image.bytes + + layer_index += 1 + + assert (len(manifest.layers) + empty_count) == len(images) return PullResult(manifests=manifests, image_ids=image_ids) diff --git a/test/registry/protocols.py b/test/registry/protocols.py index 0f167528a..fa68b47d9 100644 --- a/test/registry/protocols.py +++ b/test/registry/protocols.py @@ -7,14 +7,20 @@ from cStringIO import StringIO from enum import Enum, unique from six import add_metaclass -Image = namedtuple('Image', ['id', 'parent_id', 'bytes', 'size', 'config', 'created', 'urls']) -Image.__new__.__defaults__ = (None, None, None, None) +from image.docker.schema2 import EMPTY_LAYER_BYTES + +Image = namedtuple('Image', ['id', 'parent_id', 'bytes', 'size', 'config', 'created', 'urls', + 'is_empty']) +Image.__new__.__defaults__ = (None, None, None, None, False) PushResult = namedtuple('PushResult', ['manifests', 'headers']) PullResult = namedtuple('PullResult', ['manifests', 'image_ids']) -def layer_bytes_for_contents(contents, mode='|gz', other_files=None): +def layer_bytes_for_contents(contents, mode='|gz', other_files=None, empty=False): + if empty: + return EMPTY_LAYER_BYTES + layer_data = StringIO() tar_file = tarfile.open(fileobj=layer_data, mode='w' + mode) diff --git a/test/registry/registry_tests.py b/test/registry/registry_tests.py index 0c40c77ea..24aaedadd 100644 --- a/test/registry/registry_tests.py +++ b/test/registry/registry_tests.py @@ -39,6 +39,19 @@ def test_basic_push_pull(pusher, puller, basic_images, liveserver_session, app_r credentials=credentials) +def test_empty_layer(pusher, puller, images_with_empty_layer, liveserver_session, app_reloader): + """ Test: Push and pull of an image with an empty layer to a new repository. """ + credentials = ('devtable', 'password') + + # Push a new repository. + pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', images_with_empty_layer, + credentials=credentials) + + # Pull the repository to verify. + puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', images_with_empty_layer, + credentials=credentials) + + def test_multi_layer_images_push_pull(pusher, puller, multi_layer_images, liveserver_session, app_reloader): """ Test: Basic push and pull of a multi-layered image to a new repository. """