Properly handle the empty layer when pushing schema 2 manifests
Docker doesn't send us the contents of this layer, so we are forced to synthesize it ourselves
This commit is contained in:
parent
947c029afa
commit
4985040d31
13 changed files with 173 additions and 25 deletions
|
@ -1,3 +1,5 @@
|
|||
import logging
|
||||
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
|
||||
|
@ -7,6 +9,9 @@ from data.database import (Repository, Namespace, ImageStorage, Image, ImageStor
|
|||
BlobUpload, ImageStorageLocation, db_random_func)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_repository_blob_by_digest(repository, blob_digest):
|
||||
""" Find the content-addressable blob linked to the specified repository.
|
||||
"""
|
||||
|
@ -157,3 +162,31 @@ def initiate_upload(namespace, repo_name, uuid, location_name, storage_metadata)
|
|||
location = storage_model.get_image_location_for_name(location_name)
|
||||
return BlobUpload.create(repository=repo, location=location.id, uuid=uuid,
|
||||
storage_metadata=storage_metadata)
|
||||
|
||||
|
||||
def get_or_create_shared_blob(digest, byte_data, storage):
|
||||
""" Returns the ImageStorage blob with the given digest or, if not present,
|
||||
adds a row and writes the given byte data to the storage engine.
|
||||
This method is *only* to be used for shared blobs that are globally
|
||||
accessible, such as the special empty gzipped tar layer that Docker
|
||||
no longer pushes to us.
|
||||
"""
|
||||
try:
|
||||
return ImageStorage.get(content_checksum=digest, uploading=False)
|
||||
except ImageStorage.DoesNotExist:
|
||||
record = ImageStorage.create(image_size=len(byte_data), content_checksum=digest,
|
||||
cas_path=True, uploading=True)
|
||||
preferred = storage.preferred_locations[0]
|
||||
location_obj = ImageStorageLocation.get(name=preferred)
|
||||
try:
|
||||
storage.put_content([preferred], storage_model.get_layer_path(record), byte_data)
|
||||
ImageStoragePlacement.create(storage=record, location=location_obj)
|
||||
|
||||
record.uploading = False
|
||||
record.save()
|
||||
except:
|
||||
logger.exception('Exception when trying to write special layer %s', digest)
|
||||
record.delete_instance()
|
||||
raise
|
||||
|
||||
return record
|
||||
|
|
|
@ -7,11 +7,13 @@ from peewee import IntegrityError, JOIN
|
|||
from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild,
|
||||
db_transaction)
|
||||
from data.model import BlobDoesNotExist
|
||||
from data.model.blob import get_or_create_shared_blob
|
||||
from data.model.oci.tag import filter_to_alive_tags
|
||||
from data.model.oci.label import create_manifest_label
|
||||
from data.model.oci.retriever import RepositoryContentRetriever
|
||||
from data.model.storage import lookup_repo_storages_by_content_checksum
|
||||
from data.model.image import lookup_repository_images, get_image, synthesize_v1_image
|
||||
from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES
|
||||
from image.docker.schema1 import ManifestException
|
||||
from image.docker.schema2.list import MalformedSchema2ManifestList
|
||||
from util.validation import is_json
|
||||
|
@ -121,6 +123,15 @@ def _create_manifest(repository_id, manifest_interface_instance, storage):
|
|||
manifest_interface_instance.digest, repository_id)
|
||||
return None
|
||||
|
||||
# Special check: If the empty layer blob is needed for this manifest, add it to the
|
||||
# blob map. This is necessary because Docker decided to elide sending of this special
|
||||
# empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
|
||||
if manifest_interface_instance.get_requires_empty_layer_blob(retriever):
|
||||
shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage)
|
||||
assert not shared_blob.uploading
|
||||
assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
|
||||
blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob
|
||||
|
||||
# Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
|
||||
# image.
|
||||
legacy_image = None
|
||||
|
@ -214,10 +225,11 @@ def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map,
|
|||
if parent_image is None:
|
||||
return None
|
||||
|
||||
storage_reference = blob_map[rewritten_image.content_checksum]
|
||||
synthesized = synthesize_v1_image(
|
||||
repository_id,
|
||||
blob_map[rewritten_image.content_checksum].id,
|
||||
blob_map[rewritten_image.content_checksum].image_size,
|
||||
storage_reference.id,
|
||||
storage_reference.image_size,
|
||||
rewritten_image.image_id,
|
||||
rewritten_image.created,
|
||||
rewritten_image.comment,
|
||||
|
|
|
@ -82,6 +82,11 @@ class ManifestInterface(object):
|
|||
of manifest does not support labels. """
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_requires_empty_layer_blob(self, content_retriever):
|
||||
""" Whether this schema requires the special empty layer blob. """
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def unsigned(self):
|
||||
""" Returns an unsigned version of this manifest. """
|
||||
|
|
|
@ -312,6 +312,9 @@ class DockerSchema1Manifest(ManifestInterface):
|
|||
def get_manifest_labels(self, content_retriever):
|
||||
return self.layers[-1].v1_metadata.labels
|
||||
|
||||
def get_requires_empty_layer_blob(self, content_retriever):
|
||||
return False
|
||||
|
||||
def unsigned(self):
|
||||
if self.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE:
|
||||
return self
|
||||
|
|
|
@ -19,3 +19,12 @@ OCI_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.oci.image.index.v1+json'
|
|||
DOCKER_SCHEMA2_CONTENT_TYPES = {DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
|
||||
DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE}
|
||||
OCI_CONTENT_TYPES = {OCI_MANIFEST_CONTENT_TYPE, OCI_MANIFESTLIST_CONTENT_TYPE}
|
||||
|
||||
# The magical digest to be used for "empty" layers.
|
||||
# https://github.com/docker/distribution/blob/749f6afb4572201e3c37325d0ffedb6f32be8950/manifest/schema1/config_builder.go#L22
|
||||
EMPTY_LAYER_BLOB_DIGEST = 'sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4'
|
||||
EMPTY_LAYER_SIZE = 32
|
||||
EMPTY_LAYER_BYTES = "".join(map(chr, [
|
||||
31, 139, 8, 0, 0, 9, 110, 136, 0, 255, 98, 24, 5, 163, 96, 20, 140, 88,
|
||||
0, 8, 0, 0, 255, 255, 46, 175, 181, 239, 0, 4, 0, 0,
|
||||
]))
|
||||
|
|
|
@ -205,6 +205,15 @@ class DockerSchema2Config(object):
|
|||
""" Returns a dictionary of all the labels defined in this configuration. """
|
||||
return self._parsed.get('config', {}).get('Labels', {}) or {}
|
||||
|
||||
@property
|
||||
def has_empty_layer(self):
|
||||
""" Returns whether this config contains an empty layer. """
|
||||
for history_entry in self._parsed[DOCKER_SCHEMA2_CONFIG_HISTORY_KEY]:
|
||||
if history_entry.get(DOCKER_SCHEMA2_CONFIG_EMPTY_LAYER_KEY, False):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@property
|
||||
def history(self):
|
||||
""" Returns the history of the image, started at the base layer. """
|
||||
|
|
|
@ -255,6 +255,9 @@ class DockerSchema2ManifestList(ManifestInterface):
|
|||
def has_legacy_image(self):
|
||||
return False
|
||||
|
||||
def get_requires_empty_layer_blob(self, content_retriever):
|
||||
return False
|
||||
|
||||
def get_schema1_manifest(self, namespace_name, repo_name, tag_name, content_retriever):
|
||||
""" Returns the manifest that is compatible with V1, by virtue of being `amd64` and `linux`.
|
||||
If none, returns None.
|
||||
|
|
|
@ -11,7 +11,8 @@ from image.docker.interfaces import ManifestInterface
|
|||
from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
|
||||
DOCKER_SCHEMA2_CONFIG_CONTENT_TYPE,
|
||||
DOCKER_SCHEMA2_LAYER_CONTENT_TYPE,
|
||||
DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE)
|
||||
DOCKER_SCHEMA2_REMOTE_LAYER_CONTENT_TYPE,
|
||||
EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_SIZE)
|
||||
from image.docker.schema1 import DockerSchema1ManifestBuilder
|
||||
from image.docker.schema2.config import DockerSchema2Config
|
||||
|
||||
|
@ -34,8 +35,6 @@ ManifestImageLayer = namedtuple('ManifestImageLayer', ['history', 'blob_layer',
|
|||
'v1_parent_id', 'compressed_size',
|
||||
'blob_digest'])
|
||||
|
||||
EMPTY_BLOB_DIGEST = 'sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4'
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class MalformedSchema2Manifest(ManifestException):
|
||||
|
@ -233,8 +232,8 @@ class DockerSchema2Manifest(ManifestInterface):
|
|||
|
||||
v1_layer_parent_id = v1_layer_id
|
||||
blob_layer = None if history_entry.is_empty else self.layers[blob_index]
|
||||
blob_digest = EMPTY_BLOB_DIGEST if blob_layer is None else str(blob_layer.digest)
|
||||
compressed_size = 0 if blob_layer is None else blob_layer.compressed_size
|
||||
blob_digest = EMPTY_LAYER_BLOB_DIGEST if blob_layer is None else str(blob_layer.digest)
|
||||
compressed_size = EMPTY_LAYER_SIZE if blob_layer is None else blob_layer.compressed_size
|
||||
|
||||
# Create a new synthesized V1 ID for the history layer by hashing its content and
|
||||
# the blob associated withn it.
|
||||
|
@ -295,6 +294,13 @@ class DockerSchema2Manifest(ManifestInterface):
|
|||
def unsigned(self):
|
||||
return self
|
||||
|
||||
def get_requires_empty_layer_blob(self, content_retriever):
|
||||
schema2_config = self._get_built_config(content_retriever)
|
||||
if schema2_config is None:
|
||||
return None
|
||||
|
||||
return schema2_config.has_empty_layer
|
||||
|
||||
def _populate_schema1_builder(self, v1_builder, content_retriever):
|
||||
""" Populates a DockerSchema1ManifestBuilder with the layers and config from
|
||||
this schema.
|
||||
|
|
|
@ -1,4 +1,8 @@
|
|||
import json
|
||||
import tarfile
|
||||
|
||||
from cachetools import lru_cache
|
||||
from io import BytesIO
|
||||
|
||||
from image.docker.interfaces import ContentRetriever
|
||||
|
||||
|
@ -22,3 +26,12 @@ class ContentRetrieverForTesting(ContentRetriever):
|
|||
digests = {}
|
||||
digests[digest] = padded_string
|
||||
return ContentRetrieverForTesting(digests)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def generate_empty_layer_data():
|
||||
""" Generates the layer data for an "empty" layer. """
|
||||
with BytesIO() as f:
|
||||
tar_file = tarfile.open(fileobj=f, mode='w|gw')
|
||||
tar_file.close()
|
||||
return f.getvalue()
|
||||
|
|
|
@ -24,7 +24,6 @@ def basic_images():
|
|||
]
|
||||
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def different_images():
|
||||
""" Returns different basic images for push and pull testing. """
|
||||
|
@ -37,7 +36,6 @@ def different_images():
|
|||
]
|
||||
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sized_images():
|
||||
""" Returns basic images (with sizes) for push and pull testing. """
|
||||
|
@ -106,6 +104,24 @@ def remote_images():
|
|||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def images_with_empty_layer():
|
||||
""" Returns images for push and pull testing that contain an empty layer. """
|
||||
# Note: order is from base layer down to leaf.
|
||||
parent_bytes = layer_bytes_for_contents('parent contents')
|
||||
empty_bytes = layer_bytes_for_contents('', empty=True)
|
||||
image_bytes = layer_bytes_for_contents('some contents')
|
||||
middle_bytes = layer_bytes_for_contents('middle')
|
||||
|
||||
return [
|
||||
Image(id='parentid', bytes=parent_bytes, parent_id=None),
|
||||
Image(id='emptyid', bytes=empty_bytes, parent_id='parentid', is_empty=True),
|
||||
Image(id='middleid', bytes=middle_bytes, parent_id='emptyid'),
|
||||
Image(id='emptyid2', bytes=empty_bytes, parent_id='middleid', is_empty=True),
|
||||
Image(id='someid', bytes=image_bytes, parent_id='emptyid2'),
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def jwk():
|
||||
return RSAKey(key=RSA.generate(2048))
|
||||
|
@ -161,10 +177,10 @@ def legacy_pusher(request, data_model, jwk):
|
|||
|
||||
@pytest.fixture(params=['v1', 'v2_1', 'v2_2'])
|
||||
def puller(request, data_model, jwk):
|
||||
if request == 'v1':
|
||||
if request.param == 'v1':
|
||||
return V1Protocol(jwk)
|
||||
|
||||
if request == 'v2_2' and data_model == 'oci_model':
|
||||
if request.param == 'v2_2' and data_model == 'oci_model':
|
||||
return V2Protocol(jwk, schema2=True)
|
||||
|
||||
return V2Protocol(jwk)
|
||||
|
|
|
@ -249,7 +249,21 @@ class V2Protocol(RegistryProtocol):
|
|||
if options.manifest_invalid_blob_references:
|
||||
checksum = 'sha256:' + hashlib.sha256('notarealthing').hexdigest()
|
||||
|
||||
builder.add_layer(checksum, len(image.bytes), urls=image.urls)
|
||||
if not image.is_empty:
|
||||
builder.add_layer(checksum, len(image.bytes), urls=image.urls)
|
||||
|
||||
def history_for_image(image):
|
||||
history = {
|
||||
'created': '2018-04-03T18:37:09.284840891Z',
|
||||
'created_by': (('/bin/sh -c #(nop) ENTRYPOINT %s' % image.config['Entrypoint'])
|
||||
if image.config and image.config.get('Entrypoint')
|
||||
else '/bin/sh -c #(nop) %s' % image.id),
|
||||
}
|
||||
|
||||
if image.is_empty:
|
||||
history['empty_layer'] = True
|
||||
|
||||
return history
|
||||
|
||||
config = {
|
||||
"os": "linux",
|
||||
|
@ -257,12 +271,7 @@ class V2Protocol(RegistryProtocol):
|
|||
"type": "layers",
|
||||
"diff_ids": []
|
||||
},
|
||||
"history": [{
|
||||
'created': '2018-04-03T18:37:09.284840891Z',
|
||||
'created_by': (('/bin/sh -c #(nop) ENTRYPOINT %s' % image.config['Entrypoint'])
|
||||
if image.config and image.config.get('Entrypoint')
|
||||
else '/bin/sh -c #(nop) %s' % image.id),
|
||||
} for image in images],
|
||||
"history": [history_for_image(image) for image in images],
|
||||
}
|
||||
|
||||
if images[-1].config:
|
||||
|
@ -535,17 +544,28 @@ class V2Protocol(RegistryProtocol):
|
|||
image_ids[tag_name] = manifest.leaf_layer_v1_image_id
|
||||
|
||||
# Verify the layers.
|
||||
for index, layer in enumerate(manifest.layers):
|
||||
layer_index = 0
|
||||
empty_count = 0
|
||||
for image in images:
|
||||
if manifest.schema_version == 2 and image.is_empty:
|
||||
empty_count += 1
|
||||
continue
|
||||
|
||||
# If the layer is remote, then we expect the blob to *not* exist in the system.
|
||||
expected_status = 404 if images[index].urls else 200
|
||||
layer = manifest.layers[layer_index]
|
||||
expected_status = 404 if image.urls else 200
|
||||
result = self.conduct(session, 'GET',
|
||||
'/v2/%s/blobs/%s' % (self.repo_name(namespace, repo_name),
|
||||
layer.digest),
|
||||
expected_status=expected_status,
|
||||
headers=headers)
|
||||
|
||||
|
||||
if expected_status == 200:
|
||||
assert result.content == images[index].bytes
|
||||
assert result.content == image.bytes
|
||||
|
||||
layer_index += 1
|
||||
|
||||
assert (len(manifest.layers) + empty_count) == len(images)
|
||||
|
||||
return PullResult(manifests=manifests, image_ids=image_ids)
|
||||
|
||||
|
|
|
@ -7,14 +7,20 @@ from cStringIO import StringIO
|
|||
from enum import Enum, unique
|
||||
from six import add_metaclass
|
||||
|
||||
Image = namedtuple('Image', ['id', 'parent_id', 'bytes', 'size', 'config', 'created', 'urls'])
|
||||
Image.__new__.__defaults__ = (None, None, None, None)
|
||||
from image.docker.schema2 import EMPTY_LAYER_BYTES
|
||||
|
||||
Image = namedtuple('Image', ['id', 'parent_id', 'bytes', 'size', 'config', 'created', 'urls',
|
||||
'is_empty'])
|
||||
Image.__new__.__defaults__ = (None, None, None, None, False)
|
||||
|
||||
PushResult = namedtuple('PushResult', ['manifests', 'headers'])
|
||||
PullResult = namedtuple('PullResult', ['manifests', 'image_ids'])
|
||||
|
||||
|
||||
def layer_bytes_for_contents(contents, mode='|gz', other_files=None):
|
||||
def layer_bytes_for_contents(contents, mode='|gz', other_files=None, empty=False):
|
||||
if empty:
|
||||
return EMPTY_LAYER_BYTES
|
||||
|
||||
layer_data = StringIO()
|
||||
tar_file = tarfile.open(fileobj=layer_data, mode='w' + mode)
|
||||
|
||||
|
|
|
@ -39,6 +39,19 @@ def test_basic_push_pull(pusher, puller, basic_images, liveserver_session, app_r
|
|||
credentials=credentials)
|
||||
|
||||
|
||||
def test_empty_layer(pusher, puller, images_with_empty_layer, liveserver_session, app_reloader):
|
||||
""" Test: Push and pull of an image with an empty layer to a new repository. """
|
||||
credentials = ('devtable', 'password')
|
||||
|
||||
# Push a new repository.
|
||||
pusher.push(liveserver_session, 'devtable', 'newrepo', 'latest', images_with_empty_layer,
|
||||
credentials=credentials)
|
||||
|
||||
# Pull the repository to verify.
|
||||
puller.pull(liveserver_session, 'devtable', 'newrepo', 'latest', images_with_empty_layer,
|
||||
credentials=credentials)
|
||||
|
||||
|
||||
def test_multi_layer_images_push_pull(pusher, puller, multi_layer_images, liveserver_session,
|
||||
app_reloader):
|
||||
""" Test: Basic push and pull of a multi-layered image to a new repository. """
|
||||
|
|
Reference in a new issue