mv data/types image

This change also merges formats into the new image module.
This commit is contained in:
Jimmy Zelinskie 2016-08-02 18:45:30 -04:00
parent a516c08deb
commit 32a6c22b43
14 changed files with 342 additions and 258 deletions

View file

@ -1,11 +1,5 @@
from data.types import ( from image import Blob, BlobUpload, ManifestJSON, Repository, Tag
Blob, from image.docker.v1 import DockerV1Metadata
BlobUpload,
DockerV1Metadata,
ManifestJSON,
Repository,
Tag,
)
def create_repository(namespace_name, repo_name, user): def create_repository(namespace_name, repo_name, user):
model.repository.create_repository(namespace, reponame, user) model.repository.create_repository(namespace, reponame, user)
@ -75,14 +69,13 @@ def docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name):
def docker_v1_metadata_by_image_id(namespace_name, repo_name, image_ids): def docker_v1_metadata_by_image_id(namespace_name, repo_name, image_ids):
images_query = model.image.lookup_repository_images(repo, all_image_ids) images_query = model.image.lookup_repository_images(repo, all_image_ids)
return [DockerV1Metadata( return {image.docker_image_id: DockerV1Metadata(namespace_name=namespace_name,
namespace_name=namespace_name,
repo_name=repo_name, repo_name=repo_name,
image_id=image.docker_image_id, image_id=image.docker_image_id,
checksum=image.v1_checksum, checksum=image.v1_checksum,
content_checksum=image.content_checksum, content_checksum=image.content_checksum,
compat_json=image.v1_json_metadata, compat_json=image.v1_json_metadata)
) for image in images_query] for image in images_query}
def get_parents_docker_v1_metadata(namespace_name, repo_name, image_id): def get_parents_docker_v1_metadata(namespace_name, repo_name, image_id):

View file

@ -54,7 +54,7 @@ def paginate(limit_kwarg_name='limit', offset_kwarg_name='offset',
def callback(num_results, response): def callback(num_results, response):
if num_results <= limit: if num_results <= limit:
return return
next_page_token = encrypt_page_token({'offset': limit+offset}) next_page_token = encrypt_page_token({'offset': limit + offset})
link = get_app_url() + url_for(request.endpoint, **request.view_args) link = get_app_url() + url_for(request.endpoint, **request.view_args)
link += '?%s; rel="next"' % urlencode({'n': limit, 'next_page': next_page_token}) link += '?%s; rel="next"' % urlencode({'n': limit, 'next_page': next_page_token})
response.headers['Link'] = link response.headers['Link'] = link

View file

@ -216,7 +216,7 @@ def monolithic_upload_or_last_chunk(namespace_name, repo_name, upload_uuid):
# Ensure the digest is present before proceeding. # Ensure the digest is present before proceeding.
digest = request.args.get('digest', None) digest = request.args.get('digest', None)
if digest is None: if digest is None:
raise BlobUploadInvalid() raise BlobUploadInvalid(detail={'reason': 'Missing digest arg on monolithic upload'})
# Find the upload. # Find the upload.
blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid) blob_upload = v2.blob_upload_by_uuid(namespace_name, repo_name, upload_uuid)
@ -271,6 +271,9 @@ def delete_digest(namespace_name, repo_name, upload_uuid):
def _render_range(num_uploaded_bytes, with_bytes_prefix=True): def _render_range(num_uploaded_bytes, with_bytes_prefix=True):
"""
Returns a string formatted to be used in the Range header.
"""
return '{0}0-{1}'.format('bytes=' if with_bytes_prefix else '', num_uploaded_bytes - 1) return '{0}0-{1}'.format('bytes=' if with_bytes_prefix else '', num_uploaded_bytes - 1)
@ -327,6 +330,7 @@ def _start_offset_and_length(headers):
start_offset, length = _parse_range_header(range_header) start_offset, length = _parse_range_header(range_header)
except _InvalidRangeHeader: except _InvalidRangeHeader:
return None, None return None, None
return start_offset, length return start_offset, length
@ -339,6 +343,7 @@ def _upload_chunk(blob_upload, start_offset, length):
# Check for invalidate arguments. # Check for invalidate arguments.
if None in {blob_upload, start_offset, length}: if None in {blob_upload, start_offset, length}:
return None return None
if start_offset > 0 and start_offset > blob_upload.byte_count: if start_offset > 0 and start_offset > blob_upload.byte_count:
return None return None
@ -425,7 +430,7 @@ def _validate_digest(blob_upload, expected_digest):
computed_digest = digest_tools.sha256_digest_from_hashlib(blob_upload.sha_state) computed_digest = digest_tools.sha256_digest_from_hashlib(blob_upload.sha_state)
if not digest_tools.digests_equal(computed_digest, expected_digest): if not digest_tools.digests_equal(computed_digest, expected_digest):
logger.error('Digest mismatch for upload %s: Expected digest %s, found digest %s', logger.error('Digest mismatch for upload %s: Expected digest %s, found digest %s',
upload_obj.uuid, expected_digest, computed_digest) blob_upload.uuid, expected_digest, computed_digest)
raise BlobUploadInvalid(detail={'reason': 'Digest mismatch on uploaded blob'}) raise BlobUploadInvalid(detail={'reason': 'Digest mismatch on uploaded blob'})

View file

@ -9,13 +9,6 @@ import features
from app import docker_v2_signing_key, app, metric_queue from app import docker_v2_signing_key, app, metric_queue
from auth.registry_jwt_auth import process_registry_jwt_auth from auth.registry_jwt_auth import process_registry_jwt_auth
from data import model from data import model
from data.types import (
DockerSchema1Manifest,
DockerSchema1ManifestBuilder,
ManifestException,
DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE,
DOCKER_SCHEMA2_CONTENT_TYPES,
)
from digest import digest_tools from digest import digest_tools
from endpoints.common import parse_repository_name from endpoints.common import parse_repository_name
from endpoints.decorators import anon_protect from endpoints.decorators import anon_protect
@ -24,6 +17,9 @@ from endpoints.v2.errors import (BlobUnknown, ManifestInvalid, ManifestUnknown,
NameInvalid) NameInvalid)
from endpoints.trackhelper import track_and_log from endpoints.trackhelper import track_and_log
from endpoints.notificationhelper import spawn_notification from endpoints.notificationhelper import spawn_notification
from image.docker import ManifestException
from image.docker.schema1 import DockerSchema1Manifest, DockerSchema1ManifestBuilder
from image.docker.schema2 import DOCKER_SCHEMA2_CONTENT_TYPES
from util.registry.replication import queue_storage_replication from util.registry.replication import queue_storage_replication
from util.names import VALID_TAG_PATTERN from util.names import VALID_TAG_PATTERN
@ -56,7 +52,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, tag_name):
metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2'])
response = make_response(manifest.bytes, 200) response = make_response(manifest.bytes, 200)
response.headers['Content-Type'] = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE response.headers['Content-Type'] = manifest.content_type
response.headers['Docker-Content-Digest'] = manifest.digest response.headers['Docker-Content-Digest'] = manifest.digest
return response return response
@ -78,7 +74,7 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref):
metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2'])
response = make_response(manifest.json, 200) response = make_response(manifest.json, 200)
response.headers['Content-Type'] = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE response.headers['Content-Type'] = manifest.content_type
response.headers['Docker-Content-Digest'] = manifest.digest response.headers['Docker-Content-Digest'] = manifest.digest
return response return response
@ -151,16 +147,15 @@ def _write_manifest(namespace_name, repo_name, manifest):
# Ensure all the blobs in the manifest exist. # Ensure all the blobs in the manifest exist.
storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, manifest.checksums) storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, manifest.checksums)
storage_map = {storage.content_checksum: storage for storage in storage_query} storage_map = {storage.content_checksum: storage for storage in storage_query}
for extracted_layer_metadata in manifest.layers: for layer in manifest.layers:
digest_str = str(extracted_layer_metadata.digest) digest_str = str(layer.digest)
if digest_str not in storage_map: if digest_str not in storage_map:
raise BlobUnknown(detail={'digest': digest_str}) raise BlobUnknown(detail={'digest': digest_str})
# Lookup all the images and their parent images (if any) inside the manifest. # Lookup all the images and their parent images (if any) inside the manifest.
# This will let us know which v1 images we need to synthesize and which ones are invalid. # This will let us know which v1 images we need to synthesize and which ones are invalid.
all_image_ids = list(manifest.docker_image_ids | manifest.parent_image_ids) all_image_ids = list(manifest.docker_image_ids | manifest.parent_image_ids)
images = v2.docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) images_map = v2.docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids)
images_map = {image.image_id: image for image in images}
# Rewrite any v1 image IDs that do not match the checksum in the database. # Rewrite any v1 image IDs that do not match the checksum in the database.
try: try:
@ -181,14 +176,14 @@ def _write_manifest(namespace_name, repo_name, manifest):
raise ManifestInvalid(detail={'message': me.message}) raise ManifestInvalid(detail={'message': me.message})
# Store the manifest pointing to the tag. # Store the manifest pointing to the tag.
leaf_layer_id = images_map[manifest.layers[-1].v1_metadata.image_id].image_id leaf_layer_id = images_map[manifest.leaf_layer.v1_metadata.image_id].image_id
v2.save_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest.digest, manifest.bytes) v2.save_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest.digest, manifest.bytes)
# Queue all blob manifests for replication. # Queue all blob manifests for replication.
# TODO(jschorr): Find a way to optimize this insertion. # TODO(jschorr): Find a way to optimize this insertion.
if features.STORAGE_REPLICATION: if features.STORAGE_REPLICATION:
for extracted_v1_metadata in manifest.layers: for layer in manifest.layers:
digest_str = str(extracted_v1_metadata.digest) digest_str = str(layer.digest)
queue_storage_replication(namespace_name, storage_map[digest_str]) queue_storage_replication(namespace_name, storage_map[digest_str])
track_and_log('push_repo', repo, tag=manifest.tag) track_and_log('push_repo', repo, tag=manifest.tag)

View file

@ -11,18 +11,18 @@ from auth.auth import process_auth
from auth.auth_context import get_authenticated_user from auth.auth_context import get_authenticated_user
from auth.permissions import ReadRepositoryPermission from auth.permissions import ReadRepositoryPermission
from data import model, database from data import model, database
from endpoints.trackhelper import track_and_log from endpoints.common import route_show_if, parse_repository_name
from endpoints.decorators import anon_protect from endpoints.decorators import anon_protect
from endpoints.trackhelper import track_and_log
from endpoints.v2.blob import BLOB_DIGEST_ROUTE
from image.appc import AppCImageFormatter
from image.docker.squashed import SquashedDockerImageFormatter
from storage import Storage
from util.registry.filelike import wrap_with_handler
from util.registry.queuefile import QueueFile from util.registry.queuefile import QueueFile
from util.registry.queueprocess import QueueProcess from util.registry.queueprocess import QueueProcess
from util.registry.torrent import (make_torrent, per_user_torrent_filename, public_torrent_filename, from util.registry.torrent import (make_torrent, per_user_torrent_filename, public_torrent_filename,
PieceHasher) PieceHasher)
from util.registry.filelike import wrap_with_handler
from formats.squashed import SquashedDockerImage
from formats.aci import ACIImage
from storage import Storage
from endpoints.v2.blob import BLOB_DIGEST_ROUTE
from endpoints.common import route_show_if, parse_repository_name
verbs = Blueprint('verbs', __name__) verbs = Blueprint('verbs', __name__)
@ -372,7 +372,7 @@ def get_aci_signature(server, namespace, repository, tag, os, arch):
@verbs.route('/aci/<server>/<namespace>/<repository>/<tag>/aci/<os>/<arch>/', methods=['GET', 'HEAD']) @verbs.route('/aci/<server>/<namespace>/<repository>/<tag>/aci/<os>/<arch>/', methods=['GET', 'HEAD'])
@process_auth @process_auth
def get_aci_image(server, namespace, repository, tag, os, arch): def get_aci_image(server, namespace, repository, tag, os, arch):
return _repo_verb(namespace, repository, tag, 'aci', ACIImage(), return _repo_verb(namespace, repository, tag, 'aci', AppCImageFormatter(),
sign=True, checker=os_arch_checker(os, arch), os=os, arch=arch) sign=True, checker=os_arch_checker(os, arch), os=os, arch=arch)
@ -380,7 +380,7 @@ def get_aci_image(server, namespace, repository, tag, os, arch):
@verbs.route('/squash/<namespace>/<repository>/<tag>', methods=['GET']) @verbs.route('/squash/<namespace>/<repository>/<tag>', methods=['GET'])
@process_auth @process_auth
def get_squashed_tag(namespace, repository, tag): def get_squashed_tag(namespace, repository, tag):
return _repo_verb(namespace, repository, tag, 'squash', SquashedDockerImage()) return _repo_verb(namespace, repository, tag, 'squash', SquashedDockerImageFormatter())
@route_show_if(features.BITTORRENT) @route_show_if(features.BITTORRENT)

View file

View file

@ -1,56 +0,0 @@
import tarfile
from util.registry.gzipwrap import GzipWrap
class TarImageFormatter(object):
""" Base class for classes which produce a TAR containing image and layer data. """
def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json,
get_image_iterator, get_layer_iterator, get_image_json):
""" Builds and streams a synthetic .tar.gz that represents the formatted TAR created by this
class's implementation.
"""
return GzipWrap(self.stream_generator(namespace, repository, tag,
synthetic_image_id, layer_json,
get_image_iterator, get_layer_iterator,
get_image_json))
def stream_generator(self, namespace, repository, tag, synthetic_image_id,
layer_json, get_image_iterator, get_layer_iterator, get_image_json):
raise NotImplementedError
def tar_file(self, name, contents, mtime=None):
""" Returns the TAR binary representation for a file with the given name and file contents. """
length = len(contents)
tar_data = self.tar_file_header(name, length, mtime=mtime)
tar_data += contents
tar_data += self.tar_file_padding(length)
return tar_data
def tar_file_padding(self, length):
""" Returns TAR file padding for file data of the given length. """
if length % 512 != 0:
return '\0' * (512 - (length % 512))
return ''
def tar_file_header(self, name, file_size, mtime=None):
""" Returns TAR file header data for a file with the given name and size. """
info = tarfile.TarInfo(name=name)
info.type = tarfile.REGTYPE
info.size = file_size
if mtime is not None:
info.mtime = mtime
return info.tobuf()
def tar_folder(self, name, mtime=None):
""" Returns TAR file header data for a folder with the given name. """
info = tarfile.TarInfo(name=name)
info.type = tarfile.DIRTYPE
if mtime is not None:
info.mtime = mtime
# allow the directory to be readable by non-root users
info.mode = 0755
return info.tobuf()

103
image/__init__.py Normal file
View file

@ -0,0 +1,103 @@
import tarfile
from collections import namedtuple
from util.registry.gzipwrap import GzipWrap
class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type'])):
"""
ManifestJSON represents a Manifest of any format.
"""
class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name'])):
"""
Repository represents a collection of tags.
"""
class Tag(namedtuple('Tag', ['name', 'repository'])):
"""
Tag represents a user-facing alias for referencing a set of Manifests.
"""
class BlobUpload(namedtuple('BlobUpload', ['uuid', 'byte_count', 'uncompressed_byte_count',
'chunk_count', 'sha_state', 'location_name',
'storage_metadata', 'piece_sha_state', 'piece_hashes'])):
"""
BlobUpload represents the current state of an Blob being uploaded.
"""
class Blob(namedtuple('Blob', ['digest', 'size', 'locations'])):
"""
Blob represents an opaque binary blob saved to the storage system.
"""
class TarImageFormatter(object):
"""
Base class for classes which produce a tar containing image and layer data.
"""
def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json,
get_image_iterator, get_layer_iterator, get_image_json):
"""
Builds and streams a synthetic .tar.gz that represents the formatted tar created by this class's
implementation.
"""
return GzipWrap(self.stream_generator(namespace, repository, tag,
synthetic_image_id, layer_json,
get_image_iterator, get_layer_iterator,
get_image_json))
def stream_generator(self, namespace, repository, tag, synthetic_image_id,
layer_json, get_image_iterator, get_layer_iterator, get_image_json):
raise NotImplementedError
def tar_file(self, name, contents, mtime=None):
"""
Returns the tar binary representation for a file with the given name and file contents.
"""
length = len(contents)
tar_data = self.tar_file_header(name, length, mtime=mtime)
tar_data += contents
tar_data += self.tar_file_padding(length)
return tar_data
def tar_file_padding(self, length):
"""
Returns tar file padding for file data of the given length.
"""
if length % 512 != 0:
return '\0' * (512 - (length % 512))
return ''
def tar_file_header(self, name, file_size, mtime=None):
"""
Returns tar file header data for a file with the given name and size.
"""
info = tarfile.TarInfo(name=name)
info.type = tarfile.REGTYPE
info.size = file_size
if mtime is not None:
info.mtime = mtime
return info.tobuf()
def tar_folder(self, name, mtime=None):
"""
Returns tar file header data for a folder with the given name.
"""
info = tarfile.TarInfo(name=name)
info.type = tarfile.DIRTYPE
if mtime is not None:
info.mtime = mtime
# allow the directory to be readable by non-root users
info.mode = 0755
return info.tobuf()

View file

@ -6,14 +6,15 @@ from uuid import uuid4
from app import app from app import app
from util.registry.streamlayerformat import StreamLayerMerger from util.registry.streamlayerformat import StreamLayerMerger
from formats.tarimageformatter import TarImageFormatter from image import TarImageFormatter
ACNAME_REGEX = re.compile(r'[^a-z-]+') ACNAME_REGEX = re.compile(r'[^a-z-]+')
class ACIImage(TarImageFormatter): class AppCImageFormatter(TarImageFormatter):
""" Image formatter which produces an ACI-compatible TAR. """
Image formatter which produces an tarball according to the AppC specification.
""" """
def stream_generator(self, namespace, repository, tag, synthetic_image_id, def stream_generator(self, namespace, repository, tag, synthetic_image_id,
@ -40,7 +41,9 @@ class ACIImage(TarImageFormatter):
@staticmethod @staticmethod
def _build_isolators(docker_config): def _build_isolators(docker_config):
""" Builds ACI isolator config from the docker config. """ """
Builds ACI isolator config from the docker config.
"""
def _isolate_memory(memory): def _isolate_memory(memory):
return { return {
@ -107,22 +110,24 @@ class ACIImage(TarImageFormatter):
@staticmethod @staticmethod
def _build_ports(docker_config): def _build_ports(docker_config):
""" Builds the ports definitions for the ACI. """ """
Builds the ports definitions for the ACI.
Formats:
port/tcp
port/udp
port
"""
ports = [] ports = []
for docker_port_definition in ACIImage._get_docker_config_value(docker_config, 'Ports', []): for docker_port in AppCImageFormatter._get_docker_config_value(docker_config, 'Ports', []):
# Formats:
# port/tcp
# port/udp
# port
protocol = 'tcp' protocol = 'tcp'
port_number = -1 port_number = -1
if '/' in docker_port_definition: if '/' in docker_port:
(port_number, protocol) = docker_port_definition.split('/') (port_number, protocol) = docker_port.split('/')
else: else:
port_number = docker_port_definition port_number = docker_port
try: try:
port_number = int(port_number) port_number = int(port_number)
@ -149,9 +154,9 @@ class ACIImage(TarImageFormatter):
volumes = [] volumes = []
def get_name(docker_volume_path): def get_name(docker_volume_path):
return "volume-%s" % ACIImage._ac_name(docker_volume_path) return "volume-%s" % AppCImageFormatter._ac_name(docker_volume_path)
for docker_volume_path in ACIImage._get_docker_config_value(docker_config, 'Volumes', []): for docker_volume_path in AppCImageFormatter._get_docker_config_value(docker_config, 'Volumes', []):
if not docker_volume_path: if not docker_volume_path:
continue continue
@ -219,9 +224,9 @@ class ACIImage(TarImageFormatter):
"eventHandlers": [], "eventHandlers": [],
"workingDirectory": config.get('WorkingDir', '') or '/', "workingDirectory": config.get('WorkingDir', '') or '/',
"environment": [{"name": key, "value": value} for (key, value) in env_vars], "environment": [{"name": key, "value": value} for (key, value) in env_vars],
"isolators": ACIImage._build_isolators(config), "isolators": AppCImageFormatter._build_isolators(config),
"mountPoints": ACIImage._build_volumes(config), "mountPoints": AppCImageFormatter._build_volumes(config),
"ports": ACIImage._build_ports(config), "ports": AppCImageFormatter._build_ports(config),
"annotations": [ "annotations": [
{"name": "created", "value": docker_layer_data.get('created', '')}, {"name": "created", "value": docker_layer_data.get('created', '')},
{"name": "homepage", "value": source_url}, {"name": "homepage", "value": source_url},

10
image/docker/__init__.py Normal file
View file

@ -0,0 +1,10 @@
"""
docker implements pure data transformations according to the many Docker specifications.
"""
class DockerException(Exception):
pass
class ManifestException(DockerException):
pass

View file

@ -1,5 +1,11 @@
import json """
schema1 implements pure data transformations according to the Docker Manifest v2.1 Specification.
https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-1.md
"""
import hashlib import hashlib
import json
import logging import logging
from collections import namedtuple, OrderedDict from collections import namedtuple, OrderedDict
@ -9,72 +15,72 @@ from jwkest.jws import SIGNER_ALGS, keyrep
from jwt.utils import base64url_encode, base64url_decode from jwt.utils import base64url_encode, base64url_decode
from digest import digest_tools from digest import digest_tools
from image.docker import ManifestException
from image.docker.v1 import DockerV1Metadata
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' # Content Types
DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json' DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+json'
DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json' DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws'
DOCKER_SCHEMA1_CONTENT_TYPES = [DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE,
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE]
DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, # Keys for signature-related data
DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE] DOCKER_SCHEMA1_SIGNATURES_KEY = 'signatures'
DOCKER_SCHEMA1_HEADER_KEY = 'header'
DOCKER_SCHEMA1_SIGNATURE_KEY = 'signature'
DOCKER_SCHEMA1_PROTECTED_KEY = 'protected'
DOCKER_SCHEMA1_FORMAT_LENGTH_KEY = 'formatLength'
DOCKER_SCHEMA1_FORMAT_TAIL_KEY = 'formatTail'
# Keys for manifest-related data
DOCKER_SCHEMA1_REPO_NAME_KEY = 'name'
DOCKER_SCHEMA1_REPO_TAG_KEY = 'tag'
DOCKER_SCHEMA1_ARCH_KEY = 'architecture'
DOCKER_SCHEMA1_FS_LAYERS_KEY = 'fsLayers'
DOCKER_SCHEMA1_BLOB_SUM_KEY = 'blobSum'
DOCKER_SCHEMA1_HISTORY_KEY = 'history'
DOCKER_SCHEMA1_V1_COMPAT_KEY = 'v1Compatibility'
DOCKER_SCHEMA1_SCHEMA_VER_KEY = 'schemaVersion'
# These are used to extract backwards compatiblity data from Docker Manifest Schema 1 # Format for time used in the protected payload.
ExtractedLayerMetadata = namedtuple(
'ExtractedLayerMetadata',
['digest', 'v1_metadata', 'v1_metadata_str']
)
ExtractedDockerV1Metadata = namedtuple(
'ExtractedDockerV1Metadata',
['image_id', 'parent_image_id', 'created', 'comment', 'command']
)
# Constants used for Docker Manifest Schema 2.1
_DOCKER_SCHEMA_1_SIGNATURES_KEY = 'signatures'
_DOCKER_SCHEMA_1_PROTECTED_KEY = 'protected'
_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY = 'formatLength'
_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY = 'formatTail'
_DOCKER_SCHEMA_1_REPO_NAME_KEY = 'name'
_DOCKER_SCHEMA_1_REPO_TAG_KEY = 'tag'
_DOCKER_SCHEMA_1_FS_LAYERS_KEY = 'fsLayers'
_DOCKER_SCHEMA_1_HISTORY_KEY = 'history'
_DOCKER_SCHEMA_1_BLOB_SUM_KEY = 'blobSum'
_DOCKER_SCHEMA_1_V1_COMPAT_KEY = 'v1Compatibility'
_DOCKER_SCHEMA_1_ARCH_KEY = 'architecture'
_DOCKER_SCHEMA_1_SCHEMA_VER_KEY = 'schemaVersion'
_ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' _ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ'
_JWS_ALGORITHM = 'RS256'
# The algorithm we use to sign the JWS.
_JWS_SIGNING_ALGORITHM = 'RS256'
class ManifestException(Exception): class MalformedSchema1Manifest(ManifestException):
"""
Raised when a manifest fails an assertion that should be true according to the Docker Manifest
v2.1 Specification.
"""
pass pass
class ManifestMalformed(ManifestException): class InvalidSchema1Signature(ManifestException):
"""
Raised when there is a failure verifying the signature of a signed Docker 2.1 Manifest.
"""
pass pass
class ManifestSignatureFailure(ManifestException): class Schema1Layer(namedtuple('Schema1Layer', ['digest', 'v1_metadata', 'raw_v1_metadata'])):
pass """
Represents all of the data about an individual layer in a given Manifest.
This is the union of the fsLayers (digest) and the history entries (v1_compatibility).
"""
def _updated_v1_metadata(v1_metadata_json, updated_id_map): class Schema1V1Metadata(namedtuple('Schema1V1Metadata', ['image_id', 'parent_image_id', 'created',
parsed = json.loads(v1_metadata_json) 'comment', 'command'])):
parsed['id'] = updated_id_map[parsed['id']] """
Represents the necessary data extracted from the v1 compatibility string in a given layer of a
if parsed.get('parent') and parsed['parent'] in updated_id_map: Manifest.
parsed['parent'] = updated_id_map[parsed['parent']] """
if parsed.get('container_config', {}).get('Image'):
existing_image = parsed['container_config']['Image']
if existing_image in updated_id_map:
parsed['container_config']['image'] = updated_id_map[existing_image]
return json.dumps(parsed)
class DockerSchema1Manifest(object): class DockerSchema1Manifest(object):
@ -83,17 +89,18 @@ class DockerSchema1Manifest(object):
self._bytes = manifest_bytes self._bytes = manifest_bytes
self._parsed = json.loads(manifest_bytes) self._parsed = json.loads(manifest_bytes)
self._signatures = self._parsed[_DOCKER_SCHEMA_1_SIGNATURES_KEY] self._signatures = self._parsed[DOCKER_SCHEMA1_SIGNATURES_KEY]
self._tag = self._parsed[_DOCKER_SCHEMA_1_REPO_TAG_KEY] self._tag = self._parsed[DOCKER_SCHEMA1_REPO_TAG_KEY]
repo_name_tuple = self._parsed[_DOCKER_SCHEMA_1_REPO_NAME_KEY].split('/') repo_name = self._parsed[DOCKER_SCHEMA1_REPO_NAME_KEY]
repo_name_tuple = repo_name.split('/')
if len(repo_name_tuple) > 1: if len(repo_name_tuple) > 1:
self._namespace, self._repo_name = repo_name_tuple self._namespace, self._repo_name = repo_name_tuple
elif len(repo_name_tuple) == 1: elif len(repo_name_tuple) == 1:
self._namespace = '' self._namespace = ''
self._repo_name = repo_name_tuple[0] self._repo_name = repo_name_tuple[0]
else: else:
raise ManifestMalformed('malformed repository name') raise MalformedSchema1Manifest('malformed repository name: %s' % repo_name)
if validate: if validate:
self._validate() self._validate()
@ -108,7 +115,11 @@ class DockerSchema1Manifest(object):
sig = base64url_decode(signature['signature'].encode('utf-8')) sig = base64url_decode(signature['signature'].encode('utf-8'))
verified = signer.verify(bytes_to_verify, sig, gk) verified = signer.verify(bytes_to_verify, sig, gk)
if not verified: if not verified:
raise ManifestSignatureFailure() raise InvalidSchema1Signature()
@property
def content_type(self):
return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE
@property @property
def signatures(self): def signatures(self):
@ -151,6 +162,10 @@ class DockerSchema1Manifest(object):
def checksums(self): def checksums(self):
return list({str(mdata.digest) for mdata in self.layers}) return list({str(mdata.digest) for mdata in self.layers})
@property
def leaf_layer(self):
return self.layers[-1]
@property @property
def layers(self): def layers(self):
if self._layers is None: if self._layers is None:
@ -158,38 +173,39 @@ class DockerSchema1Manifest(object):
return self._layers return self._layers
def _generate_layers(self): def _generate_layers(self):
""" Returns a generator of objects that have the blobSum and v1Compatibility keys in them, """
Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
starting from the base image and working toward the leaf node. starting from the base image and working toward the leaf node.
""" """
for blob_sum_obj, history_obj in reversed(zip(self._parsed[_DOCKER_SCHEMA_1_FS_LAYERS_KEY], for blob_sum_obj, history_obj in reversed(zip(self._parsed[DOCKER_SCHEMA1_FS_LAYERS_KEY],
self._parsed[_DOCKER_SCHEMA_1_HISTORY_KEY])): self._parsed[DOCKER_SCHEMA1_HISTORY_KEY])):
try: try:
image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY])
except digest_tools.InvalidDigestException: except digest_tools.InvalidDigestException:
raise ManifestMalformed('could not parse manifest digest: %s' % raise MalformedSchema1Manifest('could not parse manifest digest: %s' %
blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY])
metadata_string = history_obj[_DOCKER_SCHEMA_1_V1_COMPAT_KEY] metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY]
v1_metadata = json.loads(metadata_string) v1_metadata = json.loads(metadata_string)
command_list = v1_metadata.get('container_config', {}).get('Cmd', None) command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
command = json.dumps(command_list) if command_list else None command = json.dumps(command_list) if command_list else None
if not 'id' in v1_metadata: if not 'id' in v1_metadata:
raise ManifestMalformed('invalid manifest v1 history') raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON')
extracted = ExtractedDockerV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), extracted = Schema1V1Metadata(v1_metadata['id'], v1_metadata.get('parent'),
v1_metadata.get('created'), v1_metadata.get('comment'), v1_metadata.get('created'), v1_metadata.get('comment'),
command) command)
yield ExtractedLayerMetadata(image_digest, extracted, metadata_string) yield Schema1Layer(image_digest, extracted, metadata_string)
@property @property
def payload(self): def payload(self):
protected = str(self._signatures[0][_DOCKER_SCHEMA_1_PROTECTED_KEY]) protected = str(self._signatures[0][DOCKER_SCHEMA1_PROTECTED_KEY])
parsed_protected = json.loads(base64url_decode(protected)) parsed_protected = json.loads(base64url_decode(protected))
signed_content_head = self._bytes[:parsed_protected[_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY]] signed_content_head = self._bytes[:parsed_protected[DOCKER_SCHEMA1_FORMAT_LENGTH_KEY]]
signed_content_tail = base64url_decode(str(parsed_protected[_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY])) signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY]))
return signed_content_head + signed_content_tail return signed_content_head + signed_content_tail
def rewrite_invalid_image_ids(self, images_map): def rewrite_invalid_image_ids(self, images_map):
@ -205,15 +221,15 @@ class DockerSchema1Manifest(object):
has_rewritten_ids = False has_rewritten_ids = False
updated_id_map = {} updated_id_map = {}
for extracted_layer_metadata in self.layers: for layer in self.layers:
digest_str = str(extracted_layer_metadata.digest) digest_str = str(layer.digest)
extracted_v1_metadata = extracted_layer_metadata.v1_metadata extracted_v1_metadata = layer.v1_metadata
working_image_id = extracted_v1_metadata.image_id working_image_id = extracted_v1_metadata.image_id
# Update our digest_history hash for the new layer data. # Update our digest_history hash for the new layer data.
digest_history.update(digest_str) digest_history.update(digest_str)
digest_history.update("@") digest_history.update("@")
digest_history.update(extracted_layer_metadata.v1_metadata_str.encode('utf-8')) digest_history.update(layer.raw_v1_metadata.encode('utf-8'))
digest_history.update("|") digest_history.update("|")
# Ensure that the v1 image's storage matches the V2 blob. If not, we've # Ensure that the v1 image's storage matches the V2 blob. If not, we've
@ -233,12 +249,11 @@ class DockerSchema1Manifest(object):
if extracted_v1_metadata.parent_image_id is not None: if extracted_v1_metadata.parent_image_id is not None:
parent_image_id = images_map.get(extracted_v1_metadata.parent_image_id, None) parent_image_id = images_map.get(extracted_v1_metadata.parent_image_id, None)
if parent_image_id is None: if parent_image_id is None:
raise ManifestMalformed( raise MalformedSchema1Manifest('parent not found with image ID: %s' %
'Parent not found with image ID: {0}'.format(extracted_v1_metadata.parent_image_id) extracted_v1_metadata.parent_image_id)
)
# Synthesize and store the v1 metadata in the db. # Synthesize and store the v1 metadata in the db.
v1_metadata_json = extracted_layer_metadata.v1_metadata_str v1_metadata_json = layer.raw_v1_metadata
if has_rewritten_ids: if has_rewritten_ids:
v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map) v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map)
@ -253,17 +268,19 @@ class DockerSchema1Manifest(object):
class DockerSchema1ManifestBuilder(object): class DockerSchema1ManifestBuilder(object):
""" Class which represents a manifest which is currently being built. """ """
A convenient abstraction around creating new DockerSchema1Manifests.
"""
def __init__(self, namespace_name, repo_name, tag, architecture='amd64'): def __init__(self, namespace_name, repo_name, tag, architecture='amd64'):
repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) repo_name_key = '{0}/{1}'.format(namespace_name, repo_name)
if namespace_name == '': if namespace_name == '':
repo_name_key = repo_name repo_name_key = repo_name
self._base_payload = { self._base_payload = {
_DOCKER_SCHEMA_1_REPO_TAG_KEY: tag, DOCKER_SCHEMA1_REPO_TAG_KEY: tag,
_DOCKER_SCHEMA_1_REPO_NAME_KEY: repo_name_key, DOCKER_SCHEMA1_REPO_NAME_KEY: repo_name_key,
_DOCKER_SCHEMA_1_ARCH_KEY: architecture, DOCKER_SCHEMA1_ARCH_KEY: architecture,
_DOCKER_SCHEMA_1_SCHEMA_VER_KEY: 1, DOCKER_SCHEMA1_SCHEMA_VER_KEY: 1,
} }
self._fs_layer_digests = [] self._fs_layer_digests = []
@ -271,21 +288,22 @@ class DockerSchema1ManifestBuilder(object):
def add_layer(self, layer_digest, v1_json_metadata): def add_layer(self, layer_digest, v1_json_metadata):
self._fs_layer_digests.append({ self._fs_layer_digests.append({
_DOCKER_SCHEMA_1_BLOB_SUM_KEY: layer_digest, DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest,
}) })
self._history.append({ self._history.append({
_DOCKER_SCHEMA_1_V1_COMPAT_KEY: v1_json_metadata, DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata,
}) })
return self return self
def build(self, json_web_key): def build(self, json_web_key):
""" Build the payload and sign it, returning a SignedManifest object. """
Builds a DockerSchema1Manifest object complete with signature.
""" """
payload = OrderedDict(self._base_payload) payload = OrderedDict(self._base_payload)
payload.update({ payload.update({
_DOCKER_SCHEMA_1_HISTORY_KEY: self._history, DOCKER_SCHEMA1_HISTORY_KEY: self._history,
_DOCKER_SCHEMA_1_FS_LAYERS_KEY: self._fs_layer_digests, DOCKER_SCHEMA1_FS_LAYERS_KEY: self._fs_layer_digests,
}) })
payload_str = json.dumps(payload, indent=3) payload_str = json.dumps(payload, indent=3)
@ -302,7 +320,7 @@ class DockerSchema1ManifestBuilder(object):
bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str)) bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str))
signer = SIGNER_ALGS[_JWS_ALGORITHM] signer = SIGNER_ALGS[_JWS_SIGNING_ALGORITHM]
signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key()))
logger.debug('Generated signature: %s', signature) logger.debug('Generated signature: %s', signature)
@ -311,48 +329,31 @@ class DockerSchema1ManifestBuilder(object):
if comp in public_members} if comp in public_members}
signature_block = { signature_block = {
'header': { DOCKER_SCHEMA1_HEADER_KEY: {'jwk': public_key, 'alg': _JWS_SIGNING_ALGORITHM},
'jwk': public_key, DOCKER_SCHEMA1_SIGNATURE_KEY: signature,
'alg': _JWS_ALGORITHM, DOCKER_SCHEMA1_PROTECTED_KEY: protected,
},
'signature': signature,
_DOCKER_SCHEMA_1_PROTECTED_KEY: protected,
} }
logger.debug('Encoded signature block: %s', json.dumps(signature_block)) logger.debug('Encoded signature block: %s', json.dumps(signature_block))
payload.update({ payload.update({DOCKER_SCHEMA1_SIGNATURES_KEY: [signature_block]})
_DOCKER_SCHEMA_1_SIGNATURES_KEY: [signature_block],
})
return DockerSchema1Manifest(json.dumps(payload, indent=3)) return DockerSchema1Manifest(json.dumps(payload, indent=3))
Repository = namedtuple('Repository', ['id', 'name', 'namespace_name']) def _updated_v1_metadata(v1_metadata_json, updated_id_map):
"""
Updates v1_metadata with new image IDs.
"""
parsed = json.loads(v1_metadata_json)
parsed['id'] = updated_id_map[parsed['id']]
Tag = namedtuple('Tag', ['name', 'repository']) if parsed.get('parent') and parsed['parent'] in updated_id_map:
parsed['parent'] = updated_id_map[parsed['parent']]
ManifestJSON = namedtuple('ManifestJSON', ['digest', 'json']) if parsed.get('container_config', {}).get('Image'):
existing_image = parsed['container_config']['Image']
if existing_image in updated_id_map:
parsed['container_config']['image'] = updated_id_map[existing_image]
DockerV1Metadata = namedtuple('DockerV1Metadata', ['namespace_name', return json.dumps(parsed)
'repo_name',
'image_id',
'checksum',
'content_checksum',
'created',
'comment',
'command',
'parent_image_id',
'compat_json'])
BlobUpload = namedtuple('BlobUpload', ['uuid',
'byte_count',
'uncompressed_byte_count',
'chunk_count',
'sha_state',
'location_name',
'storage_metadata',
'piece_sha_state',
'piece_hashes'])
Blob = namedtuple('Blob', ['digest', 'size', 'locations'])

11
image/docker/schema2.py Normal file
View file

@ -0,0 +1,11 @@
"""
schema2 implements pure data transformations according to the Docker Manifest v2.2 Specification.
https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-2.md
"""
# Content Types
DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json'
DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json'
DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE]

View file

@ -1,30 +1,31 @@
from app import app
from util.registry.gzipwrap import GZIP_BUFFER_SIZE
from util.registry.streamlayerformat import StreamLayerMerger
from formats.tarimageformatter import TarImageFormatter
import copy import copy
import json import json
import math import math
import calendar import calendar
from app import app
from image import TarImageFormatter
from util.registry.gzipwrap import GZIP_BUFFER_SIZE
from util.registry.streamlayerformat import StreamLayerMerger
class FileEstimationException(Exception): class FileEstimationException(Exception):
""" Exception raised by build_docker_load_stream if the estimated size of the layer TAR """
was lower than the actual size. This means the sent TAR header is wrong, and we have Exception raised by build_docker_load_stream if the estimated size of the layer tar was lower
to fail. than the actual size. This means the sent tar header is wrong, and we have to fail.
""" """
pass pass
class SquashedDockerImage(TarImageFormatter): class SquashedDockerImageFormatter(TarImageFormatter):
""" Image formatter which produces a squashed image compatible with the `docker load` """
command. Image formatter which produces a squashed image compatible with the `docker load` command.
""" """
# Multiplier against the image size reported by Docker to account for the TAR metadata. # Multiplier against the image size reported by Docker to account for the tar metadata.
# Note: This multiplier was not formally calculated in anyway and should be adjusted overtime # Note: This multiplier was not formally calculated in anyway and should be adjusted overtime
# if/when we encounter issues with it. Unfortunately, we cannot make it too large or the Docker # if/when we encounter issues with it. Unfortunately, we cannot make it too large or the Docker
# daemon dies when trying to load the entire TAR into memory. # daemon dies when trying to load the entire tar into memory.
SIZE_MULTIPLIER = 1.2 SIZE_MULTIPLIER = 1.2
def stream_generator(self, namespace, repository, tag, synthetic_image_id, def stream_generator(self, namespace, repository, tag, synthetic_image_id,
@ -39,7 +40,7 @@ class SquashedDockerImage(TarImageFormatter):
# repositories - JSON file containing a repo -> tag -> image map # repositories - JSON file containing a repo -> tag -> image map
# {image ID folder}: # {image ID folder}:
# json - The layer JSON # json - The layer JSON
# layer.tar - The TARed contents of the layer # layer.tar - The tared contents of the layer
# VERSION - The docker import version: '1.0' # VERSION - The docker import version: '1.0'
layer_merger = StreamLayerMerger(get_layer_iterator) layer_merger = StreamLayerMerger(get_layer_iterator)
@ -57,7 +58,7 @@ class SquashedDockerImage(TarImageFormatter):
yield self.tar_folder(synthetic_image_id, mtime=image_mtime) yield self.tar_folder(synthetic_image_id, mtime=image_mtime)
# Yield the JSON layer data. # Yield the JSON layer data.
layer_json = SquashedDockerImage._build_layer_json(layer_json, synthetic_image_id) layer_json = SquashedDockerImageFormatter._build_layer_json(layer_json, synthetic_image_id)
yield self.tar_file(synthetic_image_id + '/json', json.dumps(layer_json), mtime=image_mtime) yield self.tar_file(synthetic_image_id + '/json', json.dumps(layer_json), mtime=image_mtime)
# Yield the VERSION file. # Yield the VERSION file.
@ -73,7 +74,7 @@ class SquashedDockerImage(TarImageFormatter):
estimated_file_size += image.storage.uncompressed_size estimated_file_size += image.storage.uncompressed_size
else: else:
image_json = get_image_json(image) image_json = get_image_json(image)
estimated_file_size += image_json.get('Size', 0) * SquashedDockerImage.SIZE_MULTIPLIER estimated_file_size += image_json.get('Size', 0) * SquashedDockerImageFormatter.SIZE_MULTIPLIER
# Make sure the estimated file size is an integer number of bytes. # Make sure the estimated file size is an integer number of bytes.
estimated_file_size = int(math.ceil(estimated_file_size)) estimated_file_size = int(math.ceil(estimated_file_size))
@ -105,7 +106,7 @@ class SquashedDockerImage(TarImageFormatter):
# Yield any file padding to 512 bytes that is necessary. # Yield any file padding to 512 bytes that is necessary.
yield self.tar_file_padding(estimated_file_size) yield self.tar_file_padding(estimated_file_size)
# Last two records are empty in TAR spec. # Last two records are empty in tar spec.
yield '\0' * 512 yield '\0' * 512
yield '\0' * 512 yield '\0' * 512

16
image/docker/v1.py Normal file
View file

@ -0,0 +1,16 @@
"""
v1 implements pure data transformations according to the Docker Image Specification v1.1.
https://github.com/docker/docker/blob/master/image/spec/v1.1.md
"""
from collections import namedtuple
class DockerV1Metadata(namedtuple('DockerV1Metadata',
['namespace_name', 'repo_name', 'image_id', 'checksum',
'content_checksum', 'created', 'comment', 'command',
'parent_image_id', 'compat_json'])):
"""
DockerV1Metadata represents all of the metadata for a given Docker v1 Image.
The original form of the metadata is stored in the compat_json field.
"""