diff --git a/data/model/v2.py b/data/model/v2.py index 52a4d1e3d..197ebebe0 100644 --- a/data/model/v2.py +++ b/data/model/v2.py @@ -1,11 +1,5 @@ -from data.types import ( - Blob, - BlobUpload, - DockerV1Metadata, - ManifestJSON, - Repository, - Tag, -) +from image import Blob, BlobUpload, ManifestJSON, Repository, Tag +from image.docker.v1 import DockerV1Metadata def create_repository(namespace_name, repo_name, user): model.repository.create_repository(namespace, reponame, user) @@ -75,14 +69,13 @@ def docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name): def docker_v1_metadata_by_image_id(namespace_name, repo_name, image_ids): images_query = model.image.lookup_repository_images(repo, all_image_ids) - return [DockerV1Metadata( - namespace_name=namespace_name, - repo_name=repo_name, - image_id=image.docker_image_id, - checksum=image.v1_checksum, - content_checksum=image.content_checksum, - compat_json=image.v1_json_metadata, - ) for image in images_query] + return {image.docker_image_id: DockerV1Metadata(namespace_name=namespace_name, + repo_name=repo_name, + image_id=image.docker_image_id, + checksum=image.v1_checksum, + content_checksum=image.content_checksum, + compat_json=image.v1_json_metadata) + for image in images_query} def get_parents_docker_v1_metadata(namespace_name, repo_name, image_id): diff --git a/endpoints/v2/__init__.py b/endpoints/v2/__init__.py index 6ac99ecdf..97ecc40e6 100644 --- a/endpoints/v2/__init__.py +++ b/endpoints/v2/__init__.py @@ -54,7 +54,7 @@ def paginate(limit_kwarg_name='limit', offset_kwarg_name='offset', def callback(num_results, response): if num_results <= limit: return - next_page_token = encrypt_page_token({'offset': limit+offset}) + next_page_token = encrypt_page_token({'offset': limit + offset}) link = get_app_url() + url_for(request.endpoint, **request.view_args) link += '?%s; rel="next"' % urlencode({'n': limit, 'next_page': next_page_token}) response.headers['Link'] = link diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index a5836bb61..bd2dc2a5b 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -216,7 +216,7 @@ def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid): # Ensure the digest is present before proceeding. digest = request.args.get('digest', None) if digest is None: - raise BlobUploadInvalid() + raise BlobUploadInvalid(detail={'reason': 'Missing digest arg on monolithic upload'}) # Find the upload. blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) @@ -271,6 +271,9 @@ def delete_digest(namespace_name, repo_name, upload_uuid): def _render_range(num_uploaded_bytes, with_bytes_prefix=True): + """ + Returns a string formatted to be used in the Range header. + """ return '{0}0-{1}'.format('bytes=' if with_bytes_prefix else '', num_uploaded_bytes - 1) @@ -327,6 +330,7 @@ def _start_offset_and_length(headers): start_offset, length = _parse_range_header(range_header) except _InvalidRangeHeader: return None, None + return start_offset, length @@ -339,6 +343,7 @@ def _upload_chunk(blob_upload, start_offset, length): # Check for invalidate arguments. if None in {blob_upload, start_offset, length}: return None + if start_offset > 0 and start_offset > blob_upload.byte_count: return None @@ -425,7 +430,7 @@ def _validate_digest(blob_upload, expected_digest): computed_digest = digest_tools.sha256_digest_from_hashlib(blob_upload.sha_state) if not digest_tools.digests_equal(computed_digest, expected_digest): logger.error('Digest mismatch for upload %s: Expected digest %s, found digest %s', - upload_obj.uuid, expected_digest, computed_digest) + blob_upload.uuid, expected_digest, computed_digest) raise BlobUploadInvalid(detail={'reason': 'Digest mismatch on uploaded blob'}) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index cb92b1ebe..a5adfac89 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -9,13 +9,6 @@ import features from app import docker_v2_signing_key, app, metric_queue from auth.registry_jwt_auth import process_registry_jwt_auth from data import model -from data.types import ( - DockerSchema1Manifest, - DockerSchema1ManifestBuilder, - ManifestException, - DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, - DOCKER_SCHEMA2_CONTENT_TYPES, -) from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect @@ -24,6 +17,9 @@ from endpoints.v2.errors import (BlobUnknown, ManifestInvalid, ManifestUnknown, NameInvalid) from endpoints.trackhelper import track_and_log from endpoints.notificationhelper import spawn_notification +from image.docker import ManifestException +from image.docker.schema1 import DockerSchema1Manifest, DockerSchema1ManifestBuilder +from image.docker.schema2 import DOCKER_SCHEMA2_CONTENT_TYPES from util.registry.replication import queue_storage_replication from util.names import VALID_TAG_PATTERN @@ -56,7 +52,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, tag_name): metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) response = make_response(manifest.bytes, 200) - response.headers['Content-Type'] = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE + response.headers['Content-Type'] = manifest.content_type response.headers['Docker-Content-Digest'] = manifest.digest return response @@ -78,7 +74,7 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) response = make_response(manifest.json, 200) - response.headers['Content-Type'] = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE + response.headers['Content-Type'] = manifest.content_type response.headers['Docker-Content-Digest'] = manifest.digest return response @@ -151,16 +147,15 @@ def _write_manifest(namespace_name, repo_name, manifest): # Ensure all the blobs in the manifest exist. storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, manifest.checksums) storage_map = {storage.content_checksum: storage for storage in storage_query} - for extracted_layer_metadata in manifest.layers: - digest_str = str(extracted_layer_metadata.digest) + for layer in manifest.layers: + digest_str = str(layer.digest) if digest_str not in storage_map: raise BlobUnknown(detail={'digest': digest_str}) # Lookup all the images and their parent images (if any) inside the manifest. # This will let us know which v1 images we need to synthesize and which ones are invalid. all_image_ids = list(manifest.docker_image_ids | manifest.parent_image_ids) - images = v2.docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) - images_map = {image.image_id: image for image in images} + images_map = v2.docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) # Rewrite any v1 image IDs that do not match the checksum in the database. try: @@ -181,14 +176,14 @@ def _write_manifest(namespace_name, repo_name, manifest): raise ManifestInvalid(detail={'message': me.message}) # Store the manifest pointing to the tag. - leaf_layer_id = images_map[manifest.layers[-1].v1_metadata.image_id].image_id + leaf_layer_id = images_map[manifest.leaf_layer.v1_metadata.image_id].image_id v2.save_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest.digest, manifest.bytes) # Queue all blob manifests for replication. # TODO(jschorr): Find a way to optimize this insertion. if features.STORAGE_REPLICATION: - for extracted_v1_metadata in manifest.layers: - digest_str = str(extracted_v1_metadata.digest) + for layer in manifest.layers: + digest_str = str(layer.digest) queue_storage_replication(namespace_name, storage_map[digest_str]) track_and_log('push_repo', repo, tag=manifest.tag) diff --git a/endpoints/verbs.py b/endpoints/verbs.py index eff2ca35c..933fc9b0a 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -11,18 +11,18 @@ from auth.auth import process_auth from auth.auth_context import get_authenticated_user from auth.permissions import ReadRepositoryPermission from data import model, database -from endpoints.trackhelper import track_and_log +from endpoints.common import route_show_if, parse_repository_name from endpoints.decorators import anon_protect +from endpoints.trackhelper import track_and_log +from endpoints.v2.blob import BLOB_DIGEST_ROUTE +from image.appc import AppCImageFormatter +from image.docker.squashed import SquashedDockerImageFormatter +from storage import Storage +from util.registry.filelike import wrap_with_handler from util.registry.queuefile import QueueFile from util.registry.queueprocess import QueueProcess from util.registry.torrent import (make_torrent, per_user_torrent_filename, public_torrent_filename, PieceHasher) -from util.registry.filelike import wrap_with_handler -from formats.squashed import SquashedDockerImage -from formats.aci import ACIImage -from storage import Storage -from endpoints.v2.blob import BLOB_DIGEST_ROUTE -from endpoints.common import route_show_if, parse_repository_name verbs = Blueprint('verbs', __name__) @@ -372,7 +372,7 @@ def get_aci_signature(server, namespace, repository, tag, os, arch): @verbs.route('/aci/////aci///', methods=['GET', 'HEAD']) @process_auth def get_aci_image(server, namespace, repository, tag, os, arch): - return _repo_verb(namespace, repository, tag, 'aci', ACIImage(), + return _repo_verb(namespace, repository, tag, 'aci', AppCImageFormatter(), sign=True, checker=os_arch_checker(os, arch), os=os, arch=arch) @@ -380,7 +380,7 @@ def get_aci_image(server, namespace, repository, tag, os, arch): @verbs.route('/squash///', methods=['GET']) @process_auth def get_squashed_tag(namespace, repository, tag): - return _repo_verb(namespace, repository, tag, 'squash', SquashedDockerImage()) + return _repo_verb(namespace, repository, tag, 'squash', SquashedDockerImageFormatter()) @route_show_if(features.BITTORRENT) diff --git a/formats/__init__.py b/formats/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/formats/tarimageformatter.py b/formats/tarimageformatter.py deleted file mode 100644 index 2274af85e..000000000 --- a/formats/tarimageformatter.py +++ /dev/null @@ -1,56 +0,0 @@ -import tarfile -from util.registry.gzipwrap import GzipWrap - -class TarImageFormatter(object): - """ Base class for classes which produce a TAR containing image and layer data. """ - - def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator, get_image_json): - """ Builds and streams a synthetic .tar.gz that represents the formatted TAR created by this - class's implementation. - """ - return GzipWrap(self.stream_generator(namespace, repository, tag, - synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator, - get_image_json)) - - def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator, get_image_json): - raise NotImplementedError - - def tar_file(self, name, contents, mtime=None): - """ Returns the TAR binary representation for a file with the given name and file contents. """ - length = len(contents) - tar_data = self.tar_file_header(name, length, mtime=mtime) - tar_data += contents - tar_data += self.tar_file_padding(length) - return tar_data - - def tar_file_padding(self, length): - """ Returns TAR file padding for file data of the given length. """ - if length % 512 != 0: - return '\0' * (512 - (length % 512)) - - return '' - - def tar_file_header(self, name, file_size, mtime=None): - """ Returns TAR file header data for a file with the given name and size. """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.REGTYPE - info.size = file_size - - if mtime is not None: - info.mtime = mtime - return info.tobuf() - - def tar_folder(self, name, mtime=None): - """ Returns TAR file header data for a folder with the given name. """ - info = tarfile.TarInfo(name=name) - info.type = tarfile.DIRTYPE - - if mtime is not None: - info.mtime = mtime - - # allow the directory to be readable by non-root users - info.mode = 0755 - return info.tobuf() diff --git a/image/__init__.py b/image/__init__.py new file mode 100644 index 000000000..e09f7ae72 --- /dev/null +++ b/image/__init__.py @@ -0,0 +1,103 @@ +import tarfile + +from collections import namedtuple + +from util.registry.gzipwrap import GzipWrap + + +class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type'])): + """ + ManifestJSON represents a Manifest of any format. + """ + + +class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name'])): + """ + Repository represents a collection of tags. + """ + + +class Tag(namedtuple('Tag', ['name', 'repository'])): + """ + Tag represents a user-facing alias for referencing a set of Manifests. + """ + + +class BlobUpload(namedtuple('BlobUpload', ['uuid', 'byte_count', 'uncompressed_byte_count', + 'chunk_count', 'sha_state', 'location_name', + 'storage_metadata', 'piece_sha_state', 'piece_hashes'])): + """ + BlobUpload represents the current state of an Blob being uploaded. + """ + + +class Blob(namedtuple('Blob', ['digest', 'size', 'locations'])): + """ + Blob represents an opaque binary blob saved to the storage system. + """ + + +class TarImageFormatter(object): + """ + Base class for classes which produce a tar containing image and layer data. + """ + + def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json, + get_image_iterator, get_layer_iterator, get_image_json): + """ + Builds and streams a synthetic .tar.gz that represents the formatted tar created by this class's + implementation. + """ + return GzipWrap(self.stream_generator(namespace, repository, tag, + synthetic_image_id, layer_json, + get_image_iterator, get_layer_iterator, + get_image_json)) + + def stream_generator(self, namespace, repository, tag, synthetic_image_id, + layer_json, get_image_iterator, get_layer_iterator, get_image_json): + raise NotImplementedError + + def tar_file(self, name, contents, mtime=None): + """ + Returns the tar binary representation for a file with the given name and file contents. + """ + length = len(contents) + tar_data = self.tar_file_header(name, length, mtime=mtime) + tar_data += contents + tar_data += self.tar_file_padding(length) + return tar_data + + def tar_file_padding(self, length): + """ + Returns tar file padding for file data of the given length. + """ + if length % 512 != 0: + return '\0' * (512 - (length % 512)) + + return '' + + def tar_file_header(self, name, file_size, mtime=None): + """ + Returns tar file header data for a file with the given name and size. + """ + info = tarfile.TarInfo(name=name) + info.type = tarfile.REGTYPE + info.size = file_size + + if mtime is not None: + info.mtime = mtime + return info.tobuf() + + def tar_folder(self, name, mtime=None): + """ + Returns tar file header data for a folder with the given name. + """ + info = tarfile.TarInfo(name=name) + info.type = tarfile.DIRTYPE + + if mtime is not None: + info.mtime = mtime + + # allow the directory to be readable by non-root users + info.mode = 0755 + return info.tobuf() diff --git a/formats/aci.py b/image/appc/__init__.py similarity index 86% rename from formats/aci.py rename to image/appc/__init__.py index c24f691bd..592825e43 100644 --- a/formats/aci.py +++ b/image/appc/__init__.py @@ -6,14 +6,15 @@ from uuid import uuid4 from app import app from util.registry.streamlayerformat import StreamLayerMerger -from formats.tarimageformatter import TarImageFormatter +from image import TarImageFormatter ACNAME_REGEX = re.compile(r'[^a-z-]+') -class ACIImage(TarImageFormatter): - """ Image formatter which produces an ACI-compatible TAR. +class AppCImageFormatter(TarImageFormatter): + """ + Image formatter which produces an tarball according to the AppC specification. """ def stream_generator(self, namespace, repository, tag, synthetic_image_id, @@ -40,7 +41,9 @@ class ACIImage(TarImageFormatter): @staticmethod def _build_isolators(docker_config): - """ Builds ACI isolator config from the docker config. """ + """ + Builds ACI isolator config from the docker config. + """ def _isolate_memory(memory): return { @@ -107,22 +110,24 @@ class ACIImage(TarImageFormatter): @staticmethod def _build_ports(docker_config): - """ Builds the ports definitions for the ACI. """ + """ + Builds the ports definitions for the ACI. + + Formats: + port/tcp + port/udp + port + """ ports = [] - for docker_port_definition in ACIImage._get_docker_config_value(docker_config, 'Ports', []): - # Formats: - # port/tcp - # port/udp - # port - + for docker_port in AppCImageFormatter._get_docker_config_value(docker_config, 'Ports', []): protocol = 'tcp' port_number = -1 - if '/' in docker_port_definition: - (port_number, protocol) = docker_port_definition.split('/') + if '/' in docker_port: + (port_number, protocol) = docker_port.split('/') else: - port_number = docker_port_definition + port_number = docker_port try: port_number = int(port_number) @@ -149,9 +154,9 @@ class ACIImage(TarImageFormatter): volumes = [] def get_name(docker_volume_path): - return "volume-%s" % ACIImage._ac_name(docker_volume_path) + return "volume-%s" % AppCImageFormatter._ac_name(docker_volume_path) - for docker_volume_path in ACIImage._get_docker_config_value(docker_config, 'Volumes', []): + for docker_volume_path in AppCImageFormatter._get_docker_config_value(docker_config, 'Volumes', []): if not docker_volume_path: continue @@ -219,9 +224,9 @@ class ACIImage(TarImageFormatter): "eventHandlers": [], "workingDirectory": config.get('WorkingDir', '') or '/', "environment": [{"name": key, "value": value} for (key, value) in env_vars], - "isolators": ACIImage._build_isolators(config), - "mountPoints": ACIImage._build_volumes(config), - "ports": ACIImage._build_ports(config), + "isolators": AppCImageFormatter._build_isolators(config), + "mountPoints": AppCImageFormatter._build_volumes(config), + "ports": AppCImageFormatter._build_ports(config), "annotations": [ {"name": "created", "value": docker_layer_data.get('created', '')}, {"name": "homepage", "value": source_url}, diff --git a/image/docker/__init__.py b/image/docker/__init__.py new file mode 100644 index 000000000..74ceba2d7 --- /dev/null +++ b/image/docker/__init__.py @@ -0,0 +1,10 @@ +""" +docker implements pure data transformations according to the many Docker specifications. +""" + +class DockerException(Exception): + pass + + +class ManifestException(DockerException): + pass diff --git a/data/types.py b/image/docker/schema1.py similarity index 55% rename from data/types.py rename to image/docker/schema1.py index e93c06539..f154b7d21 100644 --- a/data/types.py +++ b/image/docker/schema1.py @@ -1,5 +1,11 @@ -import json +""" +schema1 implements pure data transformations according to the Docker Manifest v2.1 Specification. + +https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-1.md +""" + import hashlib +import json import logging from collections import namedtuple, OrderedDict @@ -9,72 +15,72 @@ from jwkest.jws import SIGNER_ALGS, keyrep from jwt.utils import base64url_encode, base64url_decode from digest import digest_tools +from image.docker import ManifestException +from image.docker.v1 import DockerV1Metadata logger = logging.getLogger(__name__) -DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' -DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json' -DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json' +# Content Types +DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+json' +DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' +DOCKER_SCHEMA1_CONTENT_TYPES = [DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE] -DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, - DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE] +# Keys for signature-related data +DOCKER_SCHEMA1_SIGNATURES_KEY = 'signatures' +DOCKER_SCHEMA1_HEADER_KEY = 'header' +DOCKER_SCHEMA1_SIGNATURE_KEY = 'signature' +DOCKER_SCHEMA1_PROTECTED_KEY = 'protected' +DOCKER_SCHEMA1_FORMAT_LENGTH_KEY = 'formatLength' +DOCKER_SCHEMA1_FORMAT_TAIL_KEY = 'formatTail' +# Keys for manifest-related data +DOCKER_SCHEMA1_REPO_NAME_KEY = 'name' +DOCKER_SCHEMA1_REPO_TAG_KEY = 'tag' +DOCKER_SCHEMA1_ARCH_KEY = 'architecture' +DOCKER_SCHEMA1_FS_LAYERS_KEY = 'fsLayers' +DOCKER_SCHEMA1_BLOB_SUM_KEY = 'blobSum' +DOCKER_SCHEMA1_HISTORY_KEY = 'history' +DOCKER_SCHEMA1_V1_COMPAT_KEY = 'v1Compatibility' +DOCKER_SCHEMA1_SCHEMA_VER_KEY = 'schemaVersion' -# These are used to extract backwards compatiblity data from Docker Manifest Schema 1 -ExtractedLayerMetadata = namedtuple( - 'ExtractedLayerMetadata', - ['digest', 'v1_metadata', 'v1_metadata_str'] -) -ExtractedDockerV1Metadata = namedtuple( - 'ExtractedDockerV1Metadata', - ['image_id', 'parent_image_id', 'created', 'comment', 'command'] -) - - -# Constants used for Docker Manifest Schema 2.1 -_DOCKER_SCHEMA_1_SIGNATURES_KEY = 'signatures' -_DOCKER_SCHEMA_1_PROTECTED_KEY = 'protected' -_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY = 'formatLength' -_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY = 'formatTail' -_DOCKER_SCHEMA_1_REPO_NAME_KEY = 'name' -_DOCKER_SCHEMA_1_REPO_TAG_KEY = 'tag' -_DOCKER_SCHEMA_1_FS_LAYERS_KEY = 'fsLayers' -_DOCKER_SCHEMA_1_HISTORY_KEY = 'history' -_DOCKER_SCHEMA_1_BLOB_SUM_KEY = 'blobSum' -_DOCKER_SCHEMA_1_V1_COMPAT_KEY = 'v1Compatibility' -_DOCKER_SCHEMA_1_ARCH_KEY = 'architecture' -_DOCKER_SCHEMA_1_SCHEMA_VER_KEY = 'schemaVersion' +# Format for time used in the protected payload. _ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' -_JWS_ALGORITHM = 'RS256' + +# The algorithm we use to sign the JWS. +_JWS_SIGNING_ALGORITHM = 'RS256' -class ManifestException(Exception): +class MalformedSchema1Manifest(ManifestException): + """ + Raised when a manifest fails an assertion that should be true according to the Docker Manifest + v2.1 Specification. + """ pass -class ManifestMalformed(ManifestException): +class InvalidSchema1Signature(ManifestException): + """ + Raised when there is a failure verifying the signature of a signed Docker 2.1 Manifest. + """ pass -class ManifestSignatureFailure(ManifestException): - pass +class Schema1Layer(namedtuple('Schema1Layer', ['digest', 'v1_metadata', 'raw_v1_metadata'])): + """ + Represents all of the data about an individual layer in a given Manifest. + This is the union of the fsLayers (digest) and the history entries (v1_compatibility). + """ -def _updated_v1_metadata(v1_metadata_json, updated_id_map): - parsed = json.loads(v1_metadata_json) - parsed['id'] = updated_id_map[parsed['id']] - - if parsed.get('parent') and parsed['parent'] in updated_id_map: - parsed['parent'] = updated_id_map[parsed['parent']] - - if parsed.get('container_config', {}).get('Image'): - existing_image = parsed['container_config']['Image'] - if existing_image in updated_id_map: - parsed['container_config']['image'] = updated_id_map[existing_image] - - return json.dumps(parsed) +class Schema1V1Metadata(namedtuple('Schema1V1Metadata', ['image_id', 'parent_image_id', 'created', + 'comment', 'command'])): + """ + Represents the necessary data extracted from the v1 compatibility string in a given layer of a + Manifest. + """ class DockerSchema1Manifest(object): @@ -83,17 +89,18 @@ class DockerSchema1Manifest(object): self._bytes = manifest_bytes self._parsed = json.loads(manifest_bytes) - self._signatures = self._parsed[_DOCKER_SCHEMA_1_SIGNATURES_KEY] - self._tag = self._parsed[_DOCKER_SCHEMA_1_REPO_TAG_KEY] + self._signatures = self._parsed[DOCKER_SCHEMA1_SIGNATURES_KEY] + self._tag = self._parsed[DOCKER_SCHEMA1_REPO_TAG_KEY] - repo_name_tuple = self._parsed[_DOCKER_SCHEMA_1_REPO_NAME_KEY].split('/') + repo_name = self._parsed[DOCKER_SCHEMA1_REPO_NAME_KEY] + repo_name_tuple = repo_name.split('/') if len(repo_name_tuple) > 1: self._namespace, self._repo_name = repo_name_tuple elif len(repo_name_tuple) == 1: self._namespace = '' self._repo_name = repo_name_tuple[0] else: - raise ManifestMalformed('malformed repository name') + raise MalformedSchema1Manifest('malformed repository name: %s' % repo_name) if validate: self._validate() @@ -108,7 +115,11 @@ class DockerSchema1Manifest(object): sig = base64url_decode(signature['signature'].encode('utf-8')) verified = signer.verify(bytes_to_verify, sig, gk) if not verified: - raise ManifestSignatureFailure() + raise InvalidSchema1Signature() + + @property + def content_type(self): + return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE @property def signatures(self): @@ -151,6 +162,10 @@ class DockerSchema1Manifest(object): def checksums(self): return list({str(mdata.digest) for mdata in self.layers}) + @property + def leaf_layer(self): + return self.layers[-1] + @property def layers(self): if self._layers is None: @@ -158,38 +173,39 @@ class DockerSchema1Manifest(object): return self._layers def _generate_layers(self): - """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, - starting from the base image and working toward the leaf node. """ - for blob_sum_obj, history_obj in reversed(zip(self._parsed[_DOCKER_SCHEMA_1_FS_LAYERS_KEY], - self._parsed[_DOCKER_SCHEMA_1_HISTORY_KEY])): + Returns a generator of objects that have the blobSum and v1Compatibility keys in them, + starting from the base image and working toward the leaf node. + """ + for blob_sum_obj, history_obj in reversed(zip(self._parsed[DOCKER_SCHEMA1_FS_LAYERS_KEY], + self._parsed[DOCKER_SCHEMA1_HISTORY_KEY])): try: - image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) + image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY]) except digest_tools.InvalidDigestException: - raise ManifestMalformed('could not parse manifest digest: %s' % - blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) + raise MalformedSchema1Manifest('could not parse manifest digest: %s' % + blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY]) - metadata_string = history_obj[_DOCKER_SCHEMA_1_V1_COMPAT_KEY] + metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY] v1_metadata = json.loads(metadata_string) command_list = v1_metadata.get('container_config', {}).get('Cmd', None) command = json.dumps(command_list) if command_list else None if not 'id' in v1_metadata: - raise ManifestMalformed('invalid manifest v1 history') + raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON') - extracted = ExtractedDockerV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), - v1_metadata.get('created'), v1_metadata.get('comment'), - command) - yield ExtractedLayerMetadata(image_digest, extracted, metadata_string) + extracted = Schema1V1Metadata(v1_metadata['id'], v1_metadata.get('parent'), + v1_metadata.get('created'), v1_metadata.get('comment'), + command) + yield Schema1Layer(image_digest, extracted, metadata_string) @property def payload(self): - protected = str(self._signatures[0][_DOCKER_SCHEMA_1_PROTECTED_KEY]) + protected = str(self._signatures[0][DOCKER_SCHEMA1_PROTECTED_KEY]) parsed_protected = json.loads(base64url_decode(protected)) - signed_content_head = self._bytes[:parsed_protected[_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY]] - signed_content_tail = base64url_decode(str(parsed_protected[_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY])) + signed_content_head = self._bytes[:parsed_protected[DOCKER_SCHEMA1_FORMAT_LENGTH_KEY]] + signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY])) return signed_content_head + signed_content_tail def rewrite_invalid_image_ids(self, images_map): @@ -205,15 +221,15 @@ class DockerSchema1Manifest(object): has_rewritten_ids = False updated_id_map = {} - for extracted_layer_metadata in self.layers: - digest_str = str(extracted_layer_metadata.digest) - extracted_v1_metadata = extracted_layer_metadata.v1_metadata + for layer in self.layers: + digest_str = str(layer.digest) + extracted_v1_metadata = layer.v1_metadata working_image_id = extracted_v1_metadata.image_id # Update our digest_history hash for the new layer data. digest_history.update(digest_str) digest_history.update("@") - digest_history.update(extracted_layer_metadata.v1_metadata_str.encode('utf-8')) + digest_history.update(layer.raw_v1_metadata.encode('utf-8')) digest_history.update("|") # Ensure that the v1 image's storage matches the V2 blob. If not, we've @@ -233,12 +249,11 @@ class DockerSchema1Manifest(object): if extracted_v1_metadata.parent_image_id is not None: parent_image_id = images_map.get(extracted_v1_metadata.parent_image_id, None) if parent_image_id is None: - raise ManifestMalformed( - 'Parent not found with image ID: {0}'.format(extracted_v1_metadata.parent_image_id) - ) + raise MalformedSchema1Manifest('parent not found with image ID: %s' % + extracted_v1_metadata.parent_image_id) # Synthesize and store the v1 metadata in the db. - v1_metadata_json = extracted_layer_metadata.v1_metadata_str + v1_metadata_json = layer.raw_v1_metadata if has_rewritten_ids: v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map) @@ -253,17 +268,19 @@ class DockerSchema1Manifest(object): class DockerSchema1ManifestBuilder(object): - """ Class which represents a manifest which is currently being built. """ + """ + A convenient abstraction around creating new DockerSchema1Manifests. + """ def __init__(self, namespace_name, repo_name, tag, architecture='amd64'): repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) if namespace_name == '': repo_name_key = repo_name self._base_payload = { - _DOCKER_SCHEMA_1_REPO_TAG_KEY: tag, - _DOCKER_SCHEMA_1_REPO_NAME_KEY: repo_name_key, - _DOCKER_SCHEMA_1_ARCH_KEY: architecture, - _DOCKER_SCHEMA_1_SCHEMA_VER_KEY: 1, + DOCKER_SCHEMA1_REPO_TAG_KEY: tag, + DOCKER_SCHEMA1_REPO_NAME_KEY: repo_name_key, + DOCKER_SCHEMA1_ARCH_KEY: architecture, + DOCKER_SCHEMA1_SCHEMA_VER_KEY: 1, } self._fs_layer_digests = [] @@ -271,21 +288,22 @@ class DockerSchema1ManifestBuilder(object): def add_layer(self, layer_digest, v1_json_metadata): self._fs_layer_digests.append({ - _DOCKER_SCHEMA_1_BLOB_SUM_KEY: layer_digest, + DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest, }) self._history.append({ - _DOCKER_SCHEMA_1_V1_COMPAT_KEY: v1_json_metadata, + DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata, }) return self def build(self, json_web_key): - """ Build the payload and sign it, returning a SignedManifest object. + """ + Builds a DockerSchema1Manifest object complete with signature. """ payload = OrderedDict(self._base_payload) payload.update({ - _DOCKER_SCHEMA_1_HISTORY_KEY: self._history, - _DOCKER_SCHEMA_1_FS_LAYERS_KEY: self._fs_layer_digests, + DOCKER_SCHEMA1_HISTORY_KEY: self._history, + DOCKER_SCHEMA1_FS_LAYERS_KEY: self._fs_layer_digests, }) payload_str = json.dumps(payload, indent=3) @@ -302,7 +320,7 @@ class DockerSchema1ManifestBuilder(object): bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str)) - signer = SIGNER_ALGS[_JWS_ALGORITHM] + signer = SIGNER_ALGS[_JWS_SIGNING_ALGORITHM] signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) logger.debug('Generated signature: %s', signature) @@ -311,48 +329,31 @@ class DockerSchema1ManifestBuilder(object): if comp in public_members} signature_block = { - 'header': { - 'jwk': public_key, - 'alg': _JWS_ALGORITHM, - }, - 'signature': signature, - _DOCKER_SCHEMA_1_PROTECTED_KEY: protected, + DOCKER_SCHEMA1_HEADER_KEY: {'jwk': public_key, 'alg': _JWS_SIGNING_ALGORITHM}, + DOCKER_SCHEMA1_SIGNATURE_KEY: signature, + DOCKER_SCHEMA1_PROTECTED_KEY: protected, } logger.debug('Encoded signature block: %s', json.dumps(signature_block)) - payload.update({ - _DOCKER_SCHEMA_1_SIGNATURES_KEY: [signature_block], - }) + payload.update({DOCKER_SCHEMA1_SIGNATURES_KEY: [signature_block]}) return DockerSchema1Manifest(json.dumps(payload, indent=3)) -Repository = namedtuple('Repository', ['id', 'name', 'namespace_name']) +def _updated_v1_metadata(v1_metadata_json, updated_id_map): + """ + Updates v1_metadata with new image IDs. + """ + parsed = json.loads(v1_metadata_json) + parsed['id'] = updated_id_map[parsed['id']] -Tag = namedtuple('Tag', ['name', 'repository']) + if parsed.get('parent') and parsed['parent'] in updated_id_map: + parsed['parent'] = updated_id_map[parsed['parent']] -ManifestJSON = namedtuple('ManifestJSON', ['digest', 'json']) + if parsed.get('container_config', {}).get('Image'): + existing_image = parsed['container_config']['Image'] + if existing_image in updated_id_map: + parsed['container_config']['image'] = updated_id_map[existing_image] -DockerV1Metadata = namedtuple('DockerV1Metadata', ['namespace_name', - 'repo_name', - 'image_id', - 'checksum', - 'content_checksum', - 'created', - 'comment', - 'command', - 'parent_image_id', - 'compat_json']) - -BlobUpload = namedtuple('BlobUpload', ['uuid', - 'byte_count', - 'uncompressed_byte_count', - 'chunk_count', - 'sha_state', - 'location_name', - 'storage_metadata', - 'piece_sha_state', - 'piece_hashes']) - -Blob = namedtuple('Blob', ['digest', 'size', 'locations']) + return json.dumps(parsed) diff --git a/image/docker/schema2.py b/image/docker/schema2.py new file mode 100644 index 000000000..4a69ab8bb --- /dev/null +++ b/image/docker/schema2.py @@ -0,0 +1,11 @@ +""" +schema2 implements pure data transformations according to the Docker Manifest v2.2 Specification. + +https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-2.md +""" + +# Content Types +DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json' +DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json' +DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE] diff --git a/formats/squashed.py b/image/docker/squashed.py similarity index 85% rename from formats/squashed.py rename to image/docker/squashed.py index ba0964339..0f6628952 100644 --- a/formats/squashed.py +++ b/image/docker/squashed.py @@ -1,30 +1,31 @@ -from app import app -from util.registry.gzipwrap import GZIP_BUFFER_SIZE -from util.registry.streamlayerformat import StreamLayerMerger -from formats.tarimageformatter import TarImageFormatter - import copy import json import math import calendar +from app import app +from image import TarImageFormatter +from util.registry.gzipwrap import GZIP_BUFFER_SIZE +from util.registry.streamlayerformat import StreamLayerMerger + + class FileEstimationException(Exception): - """ Exception raised by build_docker_load_stream if the estimated size of the layer TAR - was lower than the actual size. This means the sent TAR header is wrong, and we have - to fail. + """ + Exception raised by build_docker_load_stream if the estimated size of the layer tar was lower + than the actual size. This means the sent tar header is wrong, and we have to fail. """ pass -class SquashedDockerImage(TarImageFormatter): - """ Image formatter which produces a squashed image compatible with the `docker load` - command. +class SquashedDockerImageFormatter(TarImageFormatter): + """ + Image formatter which produces a squashed image compatible with the `docker load` command. """ - # Multiplier against the image size reported by Docker to account for the TAR metadata. + # Multiplier against the image size reported by Docker to account for the tar metadata. # Note: This multiplier was not formally calculated in anyway and should be adjusted overtime # if/when we encounter issues with it. Unfortunately, we cannot make it too large or the Docker - # daemon dies when trying to load the entire TAR into memory. + # daemon dies when trying to load the entire tar into memory. SIZE_MULTIPLIER = 1.2 def stream_generator(self, namespace, repository, tag, synthetic_image_id, @@ -39,7 +40,7 @@ class SquashedDockerImage(TarImageFormatter): # repositories - JSON file containing a repo -> tag -> image map # {image ID folder}: # json - The layer JSON - # layer.tar - The TARed contents of the layer + # layer.tar - The tared contents of the layer # VERSION - The docker import version: '1.0' layer_merger = StreamLayerMerger(get_layer_iterator) @@ -57,7 +58,7 @@ class SquashedDockerImage(TarImageFormatter): yield self.tar_folder(synthetic_image_id, mtime=image_mtime) # Yield the JSON layer data. - layer_json = SquashedDockerImage._build_layer_json(layer_json, synthetic_image_id) + layer_json = SquashedDockerImageFormatter._build_layer_json(layer_json, synthetic_image_id) yield self.tar_file(synthetic_image_id + '/json', json.dumps(layer_json), mtime=image_mtime) # Yield the VERSION file. @@ -73,7 +74,7 @@ class SquashedDockerImage(TarImageFormatter): estimated_file_size += image.storage.uncompressed_size else: image_json = get_image_json(image) - estimated_file_size += image_json.get('Size', 0) * SquashedDockerImage.SIZE_MULTIPLIER + estimated_file_size += image_json.get('Size', 0) * SquashedDockerImageFormatter.SIZE_MULTIPLIER # Make sure the estimated file size is an integer number of bytes. estimated_file_size = int(math.ceil(estimated_file_size)) @@ -105,7 +106,7 @@ class SquashedDockerImage(TarImageFormatter): # Yield any file padding to 512 bytes that is necessary. yield self.tar_file_padding(estimated_file_size) - # Last two records are empty in TAR spec. + # Last two records are empty in tar spec. yield '\0' * 512 yield '\0' * 512 diff --git a/image/docker/v1.py b/image/docker/v1.py new file mode 100644 index 000000000..b6df9f21a --- /dev/null +++ b/image/docker/v1.py @@ -0,0 +1,16 @@ +""" +v1 implements pure data transformations according to the Docker Image Specification v1.1. + +https://github.com/docker/docker/blob/master/image/spec/v1.1.md +""" + +from collections import namedtuple + +class DockerV1Metadata(namedtuple('DockerV1Metadata', + ['namespace_name', 'repo_name', 'image_id', 'checksum', + 'content_checksum', 'created', 'comment', 'command', + 'parent_image_id', 'compat_json'])): + """ + DockerV1Metadata represents all of the metadata for a given Docker v1 Image. + The original form of the metadata is stored in the compat_json field. + """