mv data/types image

This change also merges formats into the new image module.
2016-08-02 18:45:30 -04:00 · 2016-08-02 18:45:30 -04:00 · 32a6c22b43
commit 32a6c22b43
parent a516c08deb
14 changed files with 342 additions and 258 deletions
--- a/image/docker/init.py
+++ b/image/docker/init.py
@ -0,0 +1,10 @@
+"""
+docker implements pure data transformations according to the many Docker specifications.
+"""
+
+class DockerException(Exception):
+  pass
+
+
+class ManifestException(DockerException):
+  pass
--- a/image/docker/schema1.py
+++ b/image/docker/schema1.py
@ -0,0 +1,359 @@
+"""
+schema1 implements pure data transformations according to the Docker Manifest v2.1 Specification.
+
+https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-1.md
+"""
+
+import hashlib
+import json
+import logging
+
+from collections import namedtuple, OrderedDict
+from datetime import datetime
+
+from jwkest.jws import SIGNER_ALGS, keyrep
+from jwt.utils import base64url_encode, base64url_decode
+
+from digest import digest_tools
+from image.docker import ManifestException
+from image.docker.v1 import DockerV1Metadata
+
+
+logger = logging.getLogger(__name__)
+
+
+# Content Types
+DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+json'
+DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws'
+DOCKER_SCHEMA1_CONTENT_TYPES = [DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE,
+                                DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE]
+
+# Keys for signature-related data
+DOCKER_SCHEMA1_SIGNATURES_KEY = 'signatures'
+DOCKER_SCHEMA1_HEADER_KEY = 'header'
+DOCKER_SCHEMA1_SIGNATURE_KEY = 'signature'
+DOCKER_SCHEMA1_PROTECTED_KEY = 'protected'
+DOCKER_SCHEMA1_FORMAT_LENGTH_KEY = 'formatLength'
+DOCKER_SCHEMA1_FORMAT_TAIL_KEY = 'formatTail'
+
+# Keys for manifest-related data
+DOCKER_SCHEMA1_REPO_NAME_KEY = 'name'
+DOCKER_SCHEMA1_REPO_TAG_KEY = 'tag'
+DOCKER_SCHEMA1_ARCH_KEY = 'architecture'
+DOCKER_SCHEMA1_FS_LAYERS_KEY = 'fsLayers'
+DOCKER_SCHEMA1_BLOB_SUM_KEY = 'blobSum'
+DOCKER_SCHEMA1_HISTORY_KEY = 'history'
+DOCKER_SCHEMA1_V1_COMPAT_KEY = 'v1Compatibility'
+DOCKER_SCHEMA1_SCHEMA_VER_KEY = 'schemaVersion'
+
+# Format for time used in the protected payload.
+_ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ'
+
+# The algorithm we use to sign the JWS.
+_JWS_SIGNING_ALGORITHM = 'RS256'
+
+
+class MalformedSchema1Manifest(ManifestException):
+  """
+  Raised when a manifest fails an assertion that should be true according to the Docker Manifest
+  v2.1 Specification.
+  """
+  pass
+
+
+class InvalidSchema1Signature(ManifestException):
+  """
+  Raised when there is a failure verifying the signature of a signed Docker 2.1 Manifest.
+  """
+  pass
+
+
+class Schema1Layer(namedtuple('Schema1Layer', ['digest', 'v1_metadata', 'raw_v1_metadata'])):
+  """
+  Represents all of the data about an individual layer in a given Manifest.
+  This is the union of the fsLayers (digest) and the history entries (v1_compatibility).
+  """
+
+
+class Schema1V1Metadata(namedtuple('Schema1V1Metadata', ['image_id', 'parent_image_id', 'created',
+                                                         'comment', 'command'])):
+  """
+  Represents the necessary data extracted from the v1 compatibility string in a given layer of a
+  Manifest.
+  """
+
+
+class DockerSchema1Manifest(object):
+  def __init__(self, manifest_bytes, validate=True):
+    self._layers = None
+    self._bytes = manifest_bytes
+
+    self._parsed = json.loads(manifest_bytes)
+    self._signatures = self._parsed[DOCKER_SCHEMA1_SIGNATURES_KEY]
+    self._tag = self._parsed[DOCKER_SCHEMA1_REPO_TAG_KEY]
+
+    repo_name = self._parsed[DOCKER_SCHEMA1_REPO_NAME_KEY]
+    repo_name_tuple = repo_name.split('/')
+    if len(repo_name_tuple) > 1:
+      self._namespace, self._repo_name = repo_name_tuple
+    elif len(repo_name_tuple) == 1:
+      self._namespace = ''
+      self._repo_name = repo_name_tuple[0]
+    else:
+      raise MalformedSchema1Manifest('malformed repository name: %s' % repo_name)
+
+    if validate:
+      self._validate()
+
+  def _validate(self):
+    for signature in self._signatures:
+      bytes_to_verify = '{0}.{1}'.format(signature['protected'],
+                                         base64url_encode(self.payload))
+      signer = SIGNER_ALGS[signature['header']['alg']]
+      key = keyrep(signature['header']['jwk'])
+      gk = key.get_key()
+      sig = base64url_decode(signature['signature'].encode('utf-8'))
+      verified = signer.verify(bytes_to_verify, sig, gk)
+      if not verified:
+        raise InvalidSchema1Signature()
+
+  @property
+  def content_type(self):
+    return DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE
+
+  @property
+  def signatures(self):
+    return self._signatures
+
+  @property
+  def namespace(self):
+    return self._namespace
+
+  @property
+  def repo_name(self):
+    return self._repo_name
+
+  @property
+  def tag(self):
+    return self._tag
+
+  @property
+  def bytes(self):
+    return self._bytes
+
+  @property
+  def manifest_json(self):
+    return self._parsed
+
+  @property
+  def digest(self):
+    return digest_tools.sha256_digest(self.payload)
+
+  @property
+  def image_ids(self):
+    return {mdata.v1_metadata.image_id for mdata in self.layers}
+
+  @property
+  def parent_image_ids(self):
+    return {mdata.v1_metadata.parent_image_id for mdata in self.layers
+            if mdata.v1_metadata.parent_image_id}
+
+  @property
+  def checksums(self):
+    return list({str(mdata.digest) for mdata in self.layers})
+
+  @property
+  def leaf_layer(self):
+    return self.layers[-1]
+
+  @property
+  def layers(self):
+    if self._layers is None:
+      self._layers = list(self._generate_layers())
+    return self._layers
+
+  def _generate_layers(self):
+    """
+    Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
+    starting from the base image and working toward the leaf node.
+    """
+    for blob_sum_obj, history_obj in reversed(zip(self._parsed[DOCKER_SCHEMA1_FS_LAYERS_KEY],
+                                                  self._parsed[DOCKER_SCHEMA1_HISTORY_KEY])):
+
+      try:
+        image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY])
+      except digest_tools.InvalidDigestException:
+        raise MalformedSchema1Manifest('could not parse manifest digest: %s' %
+                                       blob_sum_obj[DOCKER_SCHEMA1_BLOB_SUM_KEY])
+
+      metadata_string = history_obj[DOCKER_SCHEMA1_V1_COMPAT_KEY]
+
+      v1_metadata = json.loads(metadata_string)
+      command_list = v1_metadata.get('container_config', {}).get('Cmd', None)
+      command = json.dumps(command_list) if command_list else None
+
+      if not 'id' in v1_metadata:
+        raise MalformedSchema1Manifest('id field missing from v1Compatibility JSON')
+
+      extracted = Schema1V1Metadata(v1_metadata['id'], v1_metadata.get('parent'),
+                                    v1_metadata.get('created'), v1_metadata.get('comment'),
+                                    command)
+      yield Schema1Layer(image_digest, extracted, metadata_string)
+
+  @property
+  def payload(self):
+    protected = str(self._signatures[0][DOCKER_SCHEMA1_PROTECTED_KEY])
+    parsed_protected = json.loads(base64url_decode(protected))
+    signed_content_head = self._bytes[:parsed_protected[DOCKER_SCHEMA1_FORMAT_LENGTH_KEY]]
+    signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY]))
+    return signed_content_head + signed_content_tail
+
+  def rewrite_invalid_image_ids(self, images_map):
+    """
+    Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata.
+
+    If Docker gives us a layer with a v1 image ID that already points to existing
+    content, but the checksums don't match, then we need to rewrite the image ID
+    to something new in order to ensure consistency.
+    """
+    # used to synthesize a new "content addressable" image id
+    digest_history = hashlib.sha256()
+
+    has_rewritten_ids = False
+    updated_id_map = {}
+    for layer in self.layers:
+      digest_str = str(layer.digest)
+      extracted_v1_metadata = layer.v1_metadata
+      working_image_id = extracted_v1_metadata.image_id
+
+      # Update our digest_history hash for the new layer data.
+      digest_history.update(digest_str)
+      digest_history.update("@")
+      digest_history.update(layer.raw_v1_metadata.encode('utf-8'))
+      digest_history.update("|")
+
+      # Ensure that the v1 image's storage matches the V2 blob. If not, we've
+      # found a data inconsistency and need to create a new layer ID for the V1
+      # image, and all images that follow it in the ancestry chain.
+      digest_mismatch = (extracted_v1_metadata.image_id in images_map and
+                         images_map[extracted_v1_metadata.image_id].content_checksum != digest_str)
+      if digest_mismatch or has_rewritten_ids:
+        working_image_id = digest_history.hexdigest()
+        has_rewritten_ids = True
+
+      # Store the new docker id in the map
+      updated_id_map[extracted_v1_metadata.image_id] = working_image_id
+
+      # Lookup the parent image for the layer, if any.
+      parent_image_id = None
+      if extracted_v1_metadata.parent_image_id is not None:
+        parent_image_id = images_map.get(extracted_v1_metadata.parent_image_id, None)
+        if parent_image_id is None:
+          raise MalformedSchema1Manifest('parent not found with image ID: %s' %
+                                         extracted_v1_metadata.parent_image_id)
+
+      # Synthesize and store the v1 metadata in the db.
+      v1_metadata_json = layer.raw_v1_metadata
+      if has_rewritten_ids:
+        v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map)
+
+      yield DockerV1Metadata(
+        image_id=working_image_id,
+        created=extracted_v1_metadata.created,
+        comment=extracted_v1_metadata.comment,
+        command=extracted_v1_metadata.command,
+        compat_json=v1_metadata_json,
+        parent_image_id=parent_image_id,
+      )
+
+
+class DockerSchema1ManifestBuilder(object):
+  """
+  A convenient abstraction around creating new DockerSchema1Manifests.
+  """
+  def __init__(self, namespace_name, repo_name, tag, architecture='amd64'):
+    repo_name_key = '{0}/{1}'.format(namespace_name, repo_name)
+    if namespace_name == '':
+      repo_name_key = repo_name
+
+    self._base_payload = {
+      DOCKER_SCHEMA1_REPO_TAG_KEY: tag,
+      DOCKER_SCHEMA1_REPO_NAME_KEY: repo_name_key,
+      DOCKER_SCHEMA1_ARCH_KEY: architecture,
+      DOCKER_SCHEMA1_SCHEMA_VER_KEY: 1,
+    }
+
+    self._fs_layer_digests = []
+    self._history = []
+
+  def add_layer(self, layer_digest, v1_json_metadata):
+    self._fs_layer_digests.append({
+      DOCKER_SCHEMA1_BLOB_SUM_KEY: layer_digest,
+    })
+    self._history.append({
+      DOCKER_SCHEMA1_V1_COMPAT_KEY: v1_json_metadata,
+    })
+    return self
+
+
+  def build(self, json_web_key):
+    """
+    Builds a DockerSchema1Manifest object complete with signature.
+    """
+    payload = OrderedDict(self._base_payload)
+    payload.update({
+      DOCKER_SCHEMA1_HISTORY_KEY: self._history,
+      DOCKER_SCHEMA1_FS_LAYERS_KEY: self._fs_layer_digests,
+    })
+
+    payload_str = json.dumps(payload, indent=3)
+
+    split_point = payload_str.rfind('\n}')
+
+    protected_payload = {
+      'formatTail': base64url_encode(payload_str[split_point:]),
+      'formatLength': split_point,
+      'time': datetime.utcnow().strftime(_ISO_DATETIME_FORMAT_ZULU),
+    }
+    protected = base64url_encode(json.dumps(protected_payload))
+    logger.debug('Generated protected block: %s', protected)
+
+    bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str))
+
+    signer = SIGNER_ALGS[_JWS_SIGNING_ALGORITHM]
+    signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key()))
+    logger.debug('Generated signature: %s', signature)
+
+    public_members = set(json_web_key.public_members)
+    public_key = {comp: value for comp, value in json_web_key.to_dict().items()
+                  if comp in public_members}
+
+    signature_block = {
+      DOCKER_SCHEMA1_HEADER_KEY: {'jwk': public_key, 'alg': _JWS_SIGNING_ALGORITHM},
+      DOCKER_SCHEMA1_SIGNATURE_KEY: signature,
+      DOCKER_SCHEMA1_PROTECTED_KEY: protected,
+    }
+
+    logger.debug('Encoded signature block: %s', json.dumps(signature_block))
+
+    payload.update({DOCKER_SCHEMA1_SIGNATURES_KEY: [signature_block]})
+
+    return DockerSchema1Manifest(json.dumps(payload, indent=3))
+
+
+def _updated_v1_metadata(v1_metadata_json, updated_id_map):
+  """
+  Updates v1_metadata with new image IDs.
+  """
+  parsed = json.loads(v1_metadata_json)
+  parsed['id'] = updated_id_map[parsed['id']]
+
+  if parsed.get('parent') and parsed['parent'] in updated_id_map:
+    parsed['parent'] = updated_id_map[parsed['parent']]
+
+  if parsed.get('container_config', {}).get('Image'):
+    existing_image = parsed['container_config']['Image']
+    if existing_image in updated_id_map:
+      parsed['container_config']['image'] = updated_id_map[existing_image]
+
+  return json.dumps(parsed)
--- a/image/docker/schema2.py
+++ b/image/docker/schema2.py
@ -0,0 +1,11 @@
+"""
+schema2 implements pure data transformations according to the Docker Manifest v2.2 Specification.
+
+https://github.com/docker/distribution/blob/master/docs/spec/manifest-v2-2.md
+"""
+
+# Content Types
+DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json'
+DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json'
+DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
+                                DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE]
--- a/image/docker/squashed.py
+++ b/image/docker/squashed.py
@ -0,0 +1,128 @@
+import copy
+import json
+import math
+import calendar
+
+from app import app
+from image import TarImageFormatter
+from util.registry.gzipwrap import GZIP_BUFFER_SIZE
+from util.registry.streamlayerformat import StreamLayerMerger
+
+
+class FileEstimationException(Exception):
+  """
+  Exception raised by build_docker_load_stream if the estimated size of the layer tar was lower
+  than the actual size. This means the sent tar header is wrong, and we have to fail.
+  """
+  pass
+
+
+class SquashedDockerImageFormatter(TarImageFormatter):
+  """
+  Image formatter which produces a squashed image compatible with the `docker load` command.
+  """
+
+  # Multiplier against the image size reported by Docker to account for the tar metadata.
+  # Note: This multiplier was not formally calculated in anyway and should be adjusted overtime
+  # if/when we encounter issues with it. Unfortunately, we cannot make it too large or the Docker
+  # daemon dies when trying to load the entire tar into memory.
+  SIZE_MULTIPLIER = 1.2
+
+  def stream_generator(self, namespace, repository, tag, synthetic_image_id,
+                       layer_json, get_image_iterator, get_layer_iterator, get_image_json):
+    image_mtime = 0
+    created = next(get_image_iterator()).created
+    if created is not None:
+      image_mtime = calendar.timegm(created.utctimetuple())
+
+
+    # Docker import V1 Format (.tar):
+    #  repositories - JSON file containing a repo -> tag -> image map
+    #  {image ID folder}:
+    #     json - The layer JSON
+    #     layer.tar - The tared contents of the layer
+    #     VERSION - The docker import version: '1.0'
+    layer_merger = StreamLayerMerger(get_layer_iterator)
+
+    # Yield the repositories file:
+    synthetic_layer_info = {}
+    synthetic_layer_info[tag + '.squash'] = synthetic_image_id
+
+    hostname = app.config['SERVER_HOSTNAME']
+    repositories = {}
+    repositories[hostname + '/' + namespace + '/' + repository] = synthetic_layer_info
+
+    yield self.tar_file('repositories', json.dumps(repositories), mtime=image_mtime)
+
+    # Yield the image ID folder.
+    yield self.tar_folder(synthetic_image_id, mtime=image_mtime)
+
+    # Yield the JSON layer data.
+    layer_json = SquashedDockerImageFormatter._build_layer_json(layer_json, synthetic_image_id)
+    yield self.tar_file(synthetic_image_id + '/json', json.dumps(layer_json), mtime=image_mtime)
+
+    # Yield the VERSION file.
+    yield self.tar_file(synthetic_image_id + '/VERSION', '1.0', mtime=image_mtime)
+
+    # Yield the merged layer data's header.
+    estimated_file_size = 0
+    for image in get_image_iterator():
+      # In V1 we have the actual uncompressed size, which is needed for back compat with
+      # older versions of Docker.
+      # In V2, we use the size given in the image JSON.
+      if image.storage.uncompressed_size:
+        estimated_file_size += image.storage.uncompressed_size
+      else:
+        image_json = get_image_json(image)
+        estimated_file_size += image_json.get('Size', 0) * SquashedDockerImageFormatter.SIZE_MULTIPLIER
+
+    # Make sure the estimated file size is an integer number of bytes.
+    estimated_file_size = int(math.ceil(estimated_file_size))
+
+    yield self.tar_file_header(synthetic_image_id + '/layer.tar', estimated_file_size,
+                               mtime=image_mtime)
+
+    # Yield the contents of the merged layer.
+    yielded_size = 0
+    for entry in layer_merger.get_generator():
+      yield entry
+      yielded_size += len(entry)
+
+    # If the yielded size is more than the estimated size (which is unlikely but possible), then
+    # raise an exception since the tar header will be wrong.
+    if yielded_size > estimated_file_size:
+      message = "Expected %s bytes, found %s bytes" % (estimated_file_size, yielded_size)
+      raise FileEstimationException(message)
+
+    # If the yielded size is less than the estimated size (which is likely), fill the rest with
+    # zeros.
+    if yielded_size < estimated_file_size:
+      to_yield = estimated_file_size - yielded_size
+      while to_yield > 0:
+        yielded = min(to_yield, GZIP_BUFFER_SIZE)
+        yield '\0' * yielded
+        to_yield -= yielded
+
+    # Yield any file padding to 512 bytes that is necessary.
+    yield self.tar_file_padding(estimated_file_size)
+
+    # Last two records are empty in tar spec.
+    yield '\0' * 512
+    yield '\0' * 512
+
+
+  @staticmethod
+  def _build_layer_json(layer_json, synthetic_image_id):
+    updated_json = copy.deepcopy(layer_json)
+    updated_json['id'] = synthetic_image_id
+
+    if 'parent' in updated_json:
+      del updated_json['parent']
+
+    if 'config' in updated_json and 'Image' in updated_json['config']:
+      updated_json['config']['Image'] = synthetic_image_id
+
+    if 'container_config' in updated_json and 'Image' in updated_json['container_config']:
+      updated_json['container_config']['Image'] = synthetic_image_id
+
+    return updated_json
--- a/image/docker/v1.py
+++ b/image/docker/v1.py
@ -0,0 +1,16 @@
+"""
+v1 implements pure data transformations according to the Docker Image Specification v1.1.
+
+https://github.com/docker/docker/blob/master/image/spec/v1.1.md
+"""
+
+from collections import namedtuple
+
+class DockerV1Metadata(namedtuple('DockerV1Metadata',
+                                  ['namespace_name', 'repo_name', 'image_id', 'checksum',
+                                   'content_checksum', 'created', 'comment', 'command',
+                                   'parent_image_id', 'compat_json'])):
+  """
+  DockerV1Metadata represents all of the metadata for a given Docker v1 Image.
+  The original form of the metadata is stored in the compat_json field.
+  """