mv data/types image

This change also merges formats into the new image module.
This commit is contained in:
Jimmy Zelinskie 2016-08-02 18:45:30 -04:00
parent a516c08deb
commit 32a6c22b43
14 changed files with 342 additions and 258 deletions

103
image/__init__.py Normal file
View file

@ -0,0 +1,103 @@
import tarfile
from collections import namedtuple
from util.registry.gzipwrap import GzipWrap
class ManifestJSON(namedtuple('ManifestJSON', ['digest', 'json', 'media_type'])):
"""
ManifestJSON represents a Manifest of any format.
"""
class Repository(namedtuple('Repository', ['id', 'name', 'namespace_name'])):
"""
Repository represents a collection of tags.
"""
class Tag(namedtuple('Tag', ['name', 'repository'])):
"""
Tag represents a user-facing alias for referencing a set of Manifests.
"""
class BlobUpload(namedtuple('BlobUpload', ['uuid', 'byte_count', 'uncompressed_byte_count',
'chunk_count', 'sha_state', 'location_name',
'storage_metadata', 'piece_sha_state', 'piece_hashes'])):
"""
BlobUpload represents the current state of an Blob being uploaded.
"""
class Blob(namedtuple('Blob', ['digest', 'size', 'locations'])):
"""
Blob represents an opaque binary blob saved to the storage system.
"""
class TarImageFormatter(object):
"""
Base class for classes which produce a tar containing image and layer data.
"""
def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json,
get_image_iterator, get_layer_iterator, get_image_json):
"""
Builds and streams a synthetic .tar.gz that represents the formatted tar created by this class's
implementation.
"""
return GzipWrap(self.stream_generator(namespace, repository, tag,
synthetic_image_id, layer_json,
get_image_iterator, get_layer_iterator,
get_image_json))
def stream_generator(self, namespace, repository, tag, synthetic_image_id,
layer_json, get_image_iterator, get_layer_iterator, get_image_json):
raise NotImplementedError
def tar_file(self, name, contents, mtime=None):
"""
Returns the tar binary representation for a file with the given name and file contents.
"""
length = len(contents)
tar_data = self.tar_file_header(name, length, mtime=mtime)
tar_data += contents
tar_data += self.tar_file_padding(length)
return tar_data
def tar_file_padding(self, length):
"""
Returns tar file padding for file data of the given length.
"""
if length % 512 != 0:
return '\0' * (512 - (length % 512))
return ''
def tar_file_header(self, name, file_size, mtime=None):
"""
Returns tar file header data for a file with the given name and size.
"""
info = tarfile.TarInfo(name=name)
info.type = tarfile.REGTYPE
info.size = file_size
if mtime is not None:
info.mtime = mtime
return info.tobuf()
def tar_folder(self, name, mtime=None):
"""
Returns tar file header data for a folder with the given name.
"""
info = tarfile.TarInfo(name=name)
info.type = tarfile.DIRTYPE
if mtime is not None:
info.mtime = mtime
# allow the directory to be readable by non-root users
info.mode = 0755
return info.tobuf()

238
image/appc/__init__.py Normal file
View file

@ -0,0 +1,238 @@
import json
import re
import calendar
from uuid import uuid4
from app import app
from util.registry.streamlayerformat import StreamLayerMerger
from image import TarImageFormatter
ACNAME_REGEX = re.compile(r'[^a-z-]+')
class AppCImageFormatter(TarImageFormatter):
"""
Image formatter which produces an tarball according to the AppC specification.
"""
def stream_generator(self, namespace, repository, tag, synthetic_image_id,
layer_json, get_image_iterator, get_layer_iterator, get_image_json):
image_mtime = 0
created = next(get_image_iterator()).created
if created is not None:
image_mtime = calendar.timegm(created.utctimetuple())
# ACI Format (.tar):
# manifest - The JSON manifest
# rootfs - The root file system
# Yield the manifest.
manifest = self._build_manifest(namespace, repository, tag, layer_json, synthetic_image_id)
yield self.tar_file('manifest', manifest, mtime=image_mtime)
# Yield the merged layer dtaa.
yield self.tar_folder('rootfs', mtime=image_mtime)
layer_merger = StreamLayerMerger(get_layer_iterator, path_prefix='rootfs/')
for entry in layer_merger.get_generator():
yield entry
@staticmethod
def _build_isolators(docker_config):
"""
Builds ACI isolator config from the docker config.
"""
def _isolate_memory(memory):
return {
"name": "memory/limit",
"value": {
"request": str(memory) + 'B',
}
}
def _isolate_swap(memory):
return {
"name": "memory/swap",
"value": {
"request": str(memory) + 'B',
}
}
def _isolate_cpu(cpu):
return {
"name": "cpu/shares",
"value": {
"request": str(cpu),
}
}
def _isolate_capabilities(capabilities_set_value):
capabilities_set = re.split(r'[\s,]', capabilities_set_value)
return {
"name": "os/linux/capabilities-retain-set",
"value": {
"set": capabilities_set,
}
}
mappers = {
'Memory': _isolate_memory,
'MemorySwap': _isolate_swap,
'CpuShares': _isolate_cpu,
'Cpuset': _isolate_capabilities
}
isolators = []
for config_key in mappers:
value = docker_config.get(config_key)
if value:
isolators.append(mappers[config_key](value))
return isolators
@staticmethod
def _get_docker_config_value(docker_config, key, default_value):
# Try the key itself.
result = docker_config.get(key)
if result is not None:
return result or default_value
# The the lowercase version of the key.
result = docker_config.get(key.lower())
if result is not None:
return result or default_value
return default_value
@staticmethod
def _build_ports(docker_config):
"""
Builds the ports definitions for the ACI.
Formats:
port/tcp
port/udp
port
"""
ports = []
for docker_port in AppCImageFormatter._get_docker_config_value(docker_config, 'Ports', []):
protocol = 'tcp'
port_number = -1
if '/' in docker_port:
(port_number, protocol) = docker_port.split('/')
else:
port_number = docker_port
try:
port_number = int(port_number)
ports.append({
"name": "port-%s" % port_number,
"port": port_number,
"protocol": protocol,
})
except ValueError:
pass
return ports
@staticmethod
def _ac_name(value):
sanitized = ACNAME_REGEX.sub('-', value.lower()).strip('-')
if sanitized == '':
return str(uuid4())
return sanitized
@staticmethod
def _build_volumes(docker_config):
""" Builds the volumes definitions for the ACI. """
volumes = []
def get_name(docker_volume_path):
return "volume-%s" % AppCImageFormatter._ac_name(docker_volume_path)
for docker_volume_path in AppCImageFormatter._get_docker_config_value(docker_config, 'Volumes', []):
if not docker_volume_path:
continue
volumes.append({
"name": get_name(docker_volume_path),
"path": docker_volume_path,
"readOnly": False,
})
return volumes
@staticmethod
def _build_manifest(namespace, repository, tag, docker_layer_data, synthetic_image_id):
""" Builds an ACI manifest from the docker layer data. """
config = docker_layer_data.get('config', {})
source_url = "%s://%s/%s/%s:%s" % (app.config['PREFERRED_URL_SCHEME'],
app.config['SERVER_HOSTNAME'],
namespace, repository, tag)
# ACI requires that the execution command be absolutely referenced. Therefore, if we find
# a relative command, we give it as an argument to /bin/sh to resolve and execute for us.
entrypoint = config.get('Entrypoint', []) or []
exec_path = entrypoint + (config.get('Cmd', []) or [])
if exec_path and not exec_path[0].startswith('/'):
exec_path = ['/bin/sh', '-c', '""%s""' % ' '.join(exec_path)]
# TODO(jschorr): ACI doesn't support : in the name, so remove any ports.
hostname = app.config['SERVER_HOSTNAME']
hostname = hostname.split(':', 1)[0]
# Calculate the environment variables.
docker_env_vars = config.get('Env', []) or []
env_vars = []
for var in docker_env_vars:
pieces = var.split('=')
if len(pieces) != 2:
continue
env_vars.append(pieces)
manifest = {
"acKind": "ImageManifest",
"acVersion": "0.6.1",
"name": '%s/%s/%s' % (hostname.lower(), namespace.lower(), repository.lower()),
"labels": [
{
"name": "version",
"value": tag,
},
{
"name": "arch",
"value": docker_layer_data.get('architecture', 'amd64')
},
{
"name": "os",
"value": docker_layer_data.get('os', 'linux')
}
],
"app": {
"exec": exec_path,
# Below, `or 'root'` is required to replace empty string from Dockerfiles.
"user": config.get('User', '') or 'root',
"group": config.get('Group', '') or 'root',
"eventHandlers": [],
"workingDirectory": config.get('WorkingDir', '') or '/',
"environment": [{"name": key, "value": value} for (key, value) in env_vars],
"isolators": AppCImageFormatter._build_isolators(config),
"mountPoints": AppCImageFormatter._build_volumes(config),
"ports": AppCImageFormatter._build_ports(config),
"annotations": [
{"name": "created", "value": docker_layer_data.get('created', '')},
{"name": "homepage", "value": source_url},
{"name": "quay.io/derived-image", "value": synthetic_image_id},
]
},
}
return json.dumps(manifest)

10
image/docker/__init__.py Normal file
View file

@ -0,0 +1,10 @@
"""
docker implements pure data transformations according to the many Docker specifications.
"""
class DockerException(Exception):
pass
class ManifestException(DockerException):
pass

359
image/docker/schema1.py Normal file
View file

@ -0,0 +1,359 @@
"""
schema1 implements pure data transformations according to the Docker Manifest v2.1 Specification.
https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-1.md
"""
import hashlib
import json
import logging
from collections import namedtuple, OrderedDict
from datetime import datetime
from jwkest.jws import SIGNER_ALGS, keyrep
from jwt.utils import base64url_encode, base64url_decode
from digest import digest_tools
from image.docker import ManifestException
from image.docker.v1 import DockerV1Metadata
logger = logging.getLogger(__name__)
# Content Types
DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+json'
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws'
DOCKER_SCHEMA1_CONTENT_TYPES = [DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE,
DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE]
# Keys for signature-related data
DOCKER_SCHEMA1_SIGNATURES_KEY = 'signatures'
DOCKER_SCHEMA1_HEADER_KEY = 'header'
DOCKER_SCHEMA1_SIGNATURE_KEY = 'signature'
DOCKER_SCHEMA1_PROTECTED_KEY = 'protected'
DOCKER_SCHEMA1_FORMAT_LENGTH_KEY = 'formatLength'
DOCKER_SCHEMA1_FORMAT_TAIL_KEY = 'formatTail'
# Keys for manifest-related data
DOCKER_SCHEMA1_REPO_NAME_KEY = 'name'
DOCKER_SCHEMA1_REPO_TAG_KEY = 'tag'
DOCKER_SCHEMA1_ARCH_KEY = 'architecture'
DOCKER_SCHEMA1_FS_LAYERS_KEY = 'fsLayers'
DOCKER_SCHEMA1_BLOB_SUM_KEY = 'blobSum'
DOCKER_SCHEMA1_HISTORY_KEY = 'history'
DOCKER_SCHEMA1_V1_COMPAT_KEY = 'v1Compatibility'
DOCKER_SCHEMA1_SCHEMA_VER_KEY = 'schemaVersion'
# Format for time used in the protected payload.
_ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ'
# The algorithm we use to sign the JWS.
_JWS_SIGNING_ALGORITHM = 'RS256'
class MalformedSchema1Manifest(ManifestException):
"""
Raised when a manifest fails an assertion that should be true according to the Docker Manifest
v2.1 Specification.
"""
pass
class InvalidSchema1Signature(ManifestException):
"""
Raised when there is a failure verifying the signature of a signed Docker 2.1 Manifest.
"""
pass
class Schema1Layer(namedtuple('Schema1Layer', ['digest', 'v1_metadata', 'raw_v1_metadata'])):
"""
Represents all of the data about an individual layer in a given Manifest.
This is the union of the fsLayers (digest) and the history entries (v1_compatibility).
"""
class Schema1V1Metadata(namedtuple('Schema1V1Metadata', ['image_id', 'parent_image_id', 'created',
'comment', 'command'])):
"""
Represents the necessary data extracted from the v1 compatibility string in a given layer of a
Manifest.
"""
class DockerSchema1Manifest(object):
def __init__(self, manifest_bytes, validate=True):
self._layers = None
self._bytes = manifest_bytes
self._parsed = json.loads(manifest_bytes)
self._signatures = self._parsed[DOCKER_SCHEMA1_SIGNATURES_KEY]
self._tag = self._parsed[DOCKER_SCHEMA1_REPO_TAG_KEY]
repo_name = self._parsed[DOCKER_SCHEMA1_REPO_NAME_KEY]
repo_name_tuple = repo_name.split('/')
if len(repo_name_tuple) > 1:
self._namespace, self._repo_name = repo_name_tuple
elif len(repo_name_tuple) == 1:
self._namespace = ''
self._repo_name = repo_name_tuple[0]
else:
raise MalformedSchema1Manifest('malformed repository name: %s' % repo_name)
if validate:
self._validate()
def _validate(self):
for signature in self._signatures:
bytes_to_verify = '{0}.{1}'.format(signature['protected'],
base64url_encode(self.payload))
signer = SIGNER_ALGS[signature['header']['alg']]
key = keyrep(signature['header']['jwk'])
gk = key.get_key()
sig = base64url_decode(signature['signature'].encode('utf-8'))
verified = signer.verify(bytes_to_verify, sig, gk)
if not verified:
raise InvalidSchema1Signature()
@property
def content_type(self):
return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE
@property
def signatures(self):
return self._signatures
@property
def namespace(self):
return self._namespace
@property
def repo_name(self):
return self._repo_name
@property
def tag(self):
return self._tag
@property
def bytes(self):
return self._bytes
@property
def manifest_json(self):
return self._parsed
@property
def digest(self):
return digest_tools.sha256_digest(self.payload)
@property
def image_ids(self):
return {mdata.v1_metadata.image_id for mdata in self.layers}
@property
def parent_image_ids(self):
return {mdata.v1_metadata.parent_image_id for mdata in self.layers
if mdata.v1_metadata.parent_image_id}
@property
def checksums(self):
return list({str(mdata.digest) for mdata in self.layers})
@property
def leaf_layer(self):
return self.layers[-1]
@property
def layers(self):
if self._layers is None:
self._layers = list(self._generate_layers())
return self._layers
def _generate_layers(self):
"""
Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
starting from the base image and working toward the leaf node.
"""
for blob_sum_obj, history_obj in reversed(zip(self._parsed[DOCKER_SCHEMA1_FS_LAYERS_KEY],
self._parsed[DOCKER_SCHEMA1_HISTORY_KEY])):
try:
image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY])
except digest_tools.InvalidDigestException:
raise MalformedSchema1Manifest('could not parse manifest digest: %s' %
blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY])
metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY]
v1_metadata = json.loads(metadata_string)
command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
command = json.dumps(command_list) if command_list else None
if not 'id' in v1_metadata:
raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON')
extracted = Schema1V1Metadata(v1_metadata['id'], v1_metadata.get('parent'),
v1_metadata.get('created'), v1_metadata.get('comment'),
command)
yield Schema1Layer(image_digest, extracted, metadata_string)
@property
def payload(self):
protected = str(self._signatures[0][DOCKER_SCHEMA1_PROTECTED_KEY])
parsed_protected = json.loads(base64url_decode(protected))
signed_content_head = self._bytes[:parsed_protected[DOCKER_SCHEMA1_FORMAT_LENGTH_KEY]]
signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY]))
return signed_content_head + signed_content_tail
def rewrite_invalid_image_ids(self, images_map):
"""
Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata.
If Docker gives us a layer with a v1 image ID that already points to existing
content, but the checksums don't match, then we need to rewrite the image ID
to something new in order to ensure consistency.
"""
# used to synthesize a new "content addressable" image id
digest_history = hashlib.sha256()
has_rewritten_ids = False
updated_id_map = {}
for layer in self.layers:
digest_str = str(layer.digest)
extracted_v1_metadata = layer.v1_metadata
working_image_id = extracted_v1_metadata.image_id
# Update our digest_history hash for the new layer data.
digest_history.update(digest_str)
digest_history.update("@")
digest_history.update(layer.raw_v1_metadata.encode('utf-8'))
digest_history.update("|")
# Ensure that the v1 image's storage matches the V2 blob. If not, we've
# found a data inconsistency and need to create a new layer ID for the V1
# image, and all images that follow it in the ancestry chain.
digest_mismatch = (extracted_v1_metadata.image_id in images_map and
images_map[extracted_v1_metadata.image_id].content_checksum != digest_str)
if digest_mismatch or has_rewritten_ids:
working_image_id = digest_history.hexdigest()
has_rewritten_ids = True
# Store the new docker id in the map
updated_id_map[extracted_v1_metadata.image_id] = working_image_id
# Lookup the parent image for the layer, if any.
parent_image_id = None
if extracted_v1_metadata.parent_image_id is not None:
parent_image_id = images_map.get(extracted_v1_metadata.parent_image_id, None)
if parent_image_id is None:
raise MalformedSchema1Manifest('parent not found with image ID: %s' %
extracted_v1_metadata.parent_image_id)
# Synthesize and store the v1 metadata in the db.
v1_metadata_json = layer.raw_v1_metadata
if has_rewritten_ids:
v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map)
yield DockerV1Metadata(
image_id=working_image_id,
created=extracted_v1_metadata.created,
comment=extracted_v1_metadata.comment,
command=extracted_v1_metadata.command,
compat_json=v1_metadata_json,
parent_image_id=parent_image_id,
)
class DockerSchema1ManifestBuilder(object):
"""
A convenient abstraction around creating new DockerSchema1Manifests.
"""
def __init__(self, namespace_name, repo_name, tag, architecture='amd64'):
repo_name_key = '{0}/{1}'.format(namespace_name, repo_name)
if namespace_name == '':
repo_name_key = repo_name
self._base_payload = {
DOCKER_SCHEMA1_REPO_TAG_KEY: tag,
DOCKER_SCHEMA1_REPO_NAME_KEY: repo_name_key,
DOCKER_SCHEMA1_ARCH_KEY: architecture,
DOCKER_SCHEMA1_SCHEMA_VER_KEY: 1,
}
self._fs_layer_digests = []
self._history = []
def add_layer(self, layer_digest, v1_json_metadata):
self._fs_layer_digests.append({
DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest,
})
self._history.append({
DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata,
})
return self
def build(self, json_web_key):
"""
Builds a DockerSchema1Manifest object complete with signature.
"""
payload = OrderedDict(self._base_payload)
payload.update({
DOCKER_SCHEMA1_HISTORY_KEY: self._history,
DOCKER_SCHEMA1_FS_LAYERS_KEY: self._fs_layer_digests,
})
payload_str = json.dumps(payload, indent=3)
split_point = payload_str.rfind('\n}')
protected_payload = {
'formatTail': base64url_encode(payload_str[split_point:]),
'formatLength': split_point,
'time': datetime.utcnow().strftime(_ISO_DATETIME_FORMAT_ZULU),
}
protected = base64url_encode(json.dumps(protected_payload))
logger.debug('Generated protected block: %s', protected)
bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str))
signer = SIGNER_ALGS[_JWS_SIGNING_ALGORITHM]
signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key()))
logger.debug('Generated signature: %s', signature)
public_members = set(json_web_key.public_members)
public_key = {comp: value for comp, value in json_web_key.to_dict().items()
if comp in public_members}
signature_block = {
DOCKER_SCHEMA1_HEADER_KEY: {'jwk': public_key, 'alg': _JWS_SIGNING_ALGORITHM},
DOCKER_SCHEMA1_SIGNATURE_KEY: signature,
DOCKER_SCHEMA1_PROTECTED_KEY: protected,
}
logger.debug('Encoded signature block: %s', json.dumps(signature_block))
payload.update({DOCKER_SCHEMA1_SIGNATURES_KEY: [signature_block]})
return DockerSchema1Manifest(json.dumps(payload, indent=3))
def _updated_v1_metadata(v1_metadata_json, updated_id_map):
"""
Updates v1_metadata with new image IDs.
"""
parsed = json.loads(v1_metadata_json)
parsed['id'] = updated_id_map[parsed['id']]
if parsed.get('parent') and parsed['parent'] in updated_id_map:
parsed['parent'] = updated_id_map[parsed['parent']]
if parsed.get('container_config', {}).get('Image'):
existing_image = parsed['container_config']['Image']
if existing_image in updated_id_map:
parsed['container_config']['image'] = updated_id_map[existing_image]
return json.dumps(parsed)

11
image/docker/schema2.py Normal file
View file

@ -0,0 +1,11 @@
"""
schema2 implements pure data transformations according to the Docker Manifest v2.2 Specification.
https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-2.md
"""
# Content Types
DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json'
DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json'
DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE]

128
image/docker/squashed.py Normal file
View file

@ -0,0 +1,128 @@
import copy
import json
import math
import calendar
from app import app
from image import TarImageFormatter
from util.registry.gzipwrap import GZIP_BUFFER_SIZE
from util.registry.streamlayerformat import StreamLayerMerger
class FileEstimationException(Exception):
"""
Exception raised by build_docker_load_stream if the estimated size of the layer tar was lower
than the actual size. This means the sent tar header is wrong, and we have to fail.
"""
pass
class SquashedDockerImageFormatter(TarImageFormatter):
"""
Image formatter which produces a squashed image compatible with the `docker load` command.
"""
# Multiplier against the image size reported by Docker to account for the tar metadata.
# Note: This multiplier was not formally calculated in anyway and should be adjusted overtime
# if/when we encounter issues with it. Unfortunately, we cannot make it too large or the Docker
# daemon dies when trying to load the entire tar into memory.
SIZE_MULTIPLIER = 1.2
def stream_generator(self, namespace, repository, tag, synthetic_image_id,
layer_json, get_image_iterator, get_layer_iterator, get_image_json):
image_mtime = 0
created = next(get_image_iterator()).created
if created is not None:
image_mtime = calendar.timegm(created.utctimetuple())
# Docker import V1 Format (.tar):
# repositories - JSON file containing a repo -> tag -> image map
# {image ID folder}:
# json - The layer JSON
# layer.tar - The tared contents of the layer
# VERSION - The docker import version: '1.0'
layer_merger = StreamLayerMerger(get_layer_iterator)
# Yield the repositories file:
synthetic_layer_info = {}
synthetic_layer_info[tag + '.squash'] = synthetic_image_id
hostname = app.config['SERVER_HOSTNAME']
repositories = {}
repositories[hostname + '/' + namespace + '/' + repository] = synthetic_layer_info
yield self.tar_file('repositories', json.dumps(repositories), mtime=image_mtime)
# Yield the image ID folder.
yield self.tar_folder(synthetic_image_id, mtime=image_mtime)
# Yield the JSON layer data.
layer_json = SquashedDockerImageFormatter._build_layer_json(layer_json, synthetic_image_id)
yield self.tar_file(synthetic_image_id + '/json', json.dumps(layer_json), mtime=image_mtime)
# Yield the VERSION file.
yield self.tar_file(synthetic_image_id + '/VERSION', '1.0', mtime=image_mtime)
# Yield the merged layer data's header.
estimated_file_size = 0
for image in get_image_iterator():
# In V1 we have the actual uncompressed size, which is needed for back compat with
# older versions of Docker.
# In V2, we use the size given in the image JSON.
if image.storage.uncompressed_size:
estimated_file_size += image.storage.uncompressed_size
else:
image_json = get_image_json(image)
estimated_file_size += image_json.get('Size', 0) * SquashedDockerImageFormatter.SIZE_MULTIPLIER
# Make sure the estimated file size is an integer number of bytes.
estimated_file_size = int(math.ceil(estimated_file_size))
yield self.tar_file_header(synthetic_image_id + '/layer.tar', estimated_file_size,
mtime=image_mtime)
# Yield the contents of the merged layer.
yielded_size = 0
for entry in layer_merger.get_generator():
yield entry
yielded_size += len(entry)
# If the yielded size is more than the estimated size (which is unlikely but possible), then
# raise an exception since the tar header will be wrong.
if yielded_size > estimated_file_size:
message = "Expected %s bytes, found %s bytes" % (estimated_file_size, yielded_size)
raise FileEstimationException(message)
# If the yielded size is less than the estimated size (which is likely), fill the rest with
# zeros.
if yielded_size < estimated_file_size:
to_yield = estimated_file_size - yielded_size
while to_yield > 0:
yielded = min(to_yield, GZIP_BUFFER_SIZE)
yield '\0' * yielded
to_yield -= yielded
# Yield any file padding to 512 bytes that is necessary.
yield self.tar_file_padding(estimated_file_size)
# Last two records are empty in tar spec.
yield '\0' * 512
yield '\0' * 512
@staticmethod
def _build_layer_json(layer_json, synthetic_image_id):
updated_json = copy.deepcopy(layer_json)
updated_json['id'] = synthetic_image_id
if 'parent' in updated_json:
del updated_json['parent']
if 'config' in updated_json and 'Image' in updated_json['config']:
updated_json['config']['Image'] = synthetic_image_id
if 'container_config' in updated_json and 'Image' in updated_json['container_config']:
updated_json['container_config']['Image'] = synthetic_image_id
return updated_json

16
image/docker/v1.py Normal file
View file

@ -0,0 +1,16 @@
"""
v1 implements pure data transformations according to the Docker Image Specification v1.1.
https://github.com/docker/docker/blob/master/image/spec/v1.1.md
"""
from collections import namedtuple
class DockerV1Metadata(namedtuple('DockerV1Metadata',
['namespace_name', 'repo_name', 'image_id', 'checksum',
'content_checksum', 'created', 'comment', 'command',
'parent_image_id', 'compat_json'])):
"""
DockerV1Metadata represents all of the metadata for a given Docker v1 Image.
The original form of the metadata is stored in the compat_json field.
"""