From 5b630ebdb0c720291c9af2d08cb5afe6f1341346 Mon Sep 17 00:00:00 2001 From: Jimmy Zelinskie Date: Mon, 25 Jul 2016 18:56:25 -0400 Subject: [PATCH] v2/manifest: refactor to use types --- data/model/v2.py | 102 ++++++++ data/types.py | 346 ++++++++++++++++++++++++++ endpoints/v2/manifest.py | 513 ++++++++------------------------------- 3 files changed, 553 insertions(+), 408 deletions(-) create mode 100644 data/model/v2.py create mode 100644 data/types.py diff --git a/data/model/v2.py b/data/model/v2.py new file mode 100644 index 000000000..b677b462e --- /dev/null +++ b/data/model/v2.py @@ -0,0 +1,102 @@ +from data.types import ( + Repository, + Tag, + ManifestJSON, + DockerV1Metadata, +) + +def get_repository(namespace_name, repo_name): + repo = model.repository.get_repository(namespace_name, repo_name) + if repo is None: + return None + + return Repository( + id=repo.id, + name=repo.name, + namespace_name=repo.namespace_user.username, + ) + + +def get_active_tag(namespace_name, repo_name, tag_name): + try: + return model.tag.get_active_tag(namespace_name, repo_name, tag_name) + except RepositoryTag.DoesNotExist: + return None + + +def get_manifest_by_tag(namespace_name, repo_name, tag_name): + try: + manifest = model.tag.load_tag_manifest(namespace_name, repo_name, manifest_ref) + return ManifestJSON(digest=digest, json=manifest.json_data) + except model.InvalidManifestException: + return None + + +def get_manifest_by_digest(namespace_name, repo_name, digest): + try: + manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) + return ManifestJSON(digest=digest, json=manifest.json_data) + except model.InvalidManifestException: + return None + + +def get_tag_by_manifest_digest(namespace_name, repo_name, digest): + return Tag() + + +def delete_tag(namespace_name, repo_name, tag_name): + model.tag.delete_tag(namespace_name, repo_name, tag.name) + return True + + +def docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name): + if not repo_image: + return None + + return DockerV1Metadata( + namespace_name=namespace_name, + repo_name=repo_name, + image_id=image_id, + checksum=repo_image.v1_checksum, + content_checksum=repo_image.content_checksum, + compat_json=repo_image.v1_json_metadata, + ) + + +def docker_v1_metadata_by_image_id(namespace_name, repo_name, image_ids): + images_query = model.image.lookup_repository_images(repo, all_image_ids) + return [DockerV1Metadata( + namespace_name=namespace_name, + repo_name=repo_name, + image_id=image.docker_image_id, + checksum=image.v1_checksum, + content_checksum=image.content_checksum, + compat_json=image.v1_json_metadata, + ) for image in images_query] + + +def get_parents_docker_v1_metadata(namespace_name, repo_name, image_id): + # Old implementation: + # parents = model.image.get_parent_images(namespace_name, repo_name, image) + + # desired: + # return a list of the AttrDict in docker_v1_metadata + return [] + + +def create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest_digest, manifest_bytes): + try: + model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, + manifest.digest, manifest.bytes) + except IntegrityError: + # It's already there! + pass + + +def synthesize_v1_image(repo, storage, image_id, created, comment, command, compat_json, parent_image_id): + model.image.synthesize_v1_image(repo, storage, image_id, created, comment, command, compat_json, parent_image_id) + + +def save_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest_digest, manifest_bytes): + model.tag.store_tag_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest_digest, + manifest_bytes) diff --git a/data/types.py b/data/types.py new file mode 100644 index 000000000..f734f3506 --- /dev/null +++ b/data/types.py @@ -0,0 +1,346 @@ +import json +import hashlib +import logging + +from collections import namedtuple, OrderedDict +from datetime import datetime + +from jwkest.jws import SIGNER_ALGS, keyrep +from jwt.utils import base64url_encode, base64url_decode + +from digest import digest_tools + + +logger = logging.getLogger(__name__) + + +DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v1+prettyjws' +DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.v2+json' +DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE = 'application/vnd.docker.distribution.manifest.list.v2+json' + +DOCKER_SCHEMA2_CONTENT_TYPES = [DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE] + + +# These are used to extract backwards compatiblity data from Docker Manifest Schema 1 +ExtractedLayerMetadata = namedtuple( + 'ExtractedLayerMetadata', + ['digest', 'v1_metadata', 'v1_metadata_str'] +) +ExtractedDockerV1Metadata = namedtuple( + 'ExtractedDockerV1Metadata', + ['image_id', 'parent_image_id', 'created', 'comment', 'command'] +) + + +# Constants used for Docker Manifest Schema 2.1 +_DOCKER_SCHEMA_1_SIGNATURES_KEY = 'signatures' +_DOCKER_SCHEMA_1_PROTECTED_KEY = 'protected' +_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY = 'formatLength' +_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY = 'formatTail' +_DOCKER_SCHEMA_1_REPO_NAME_KEY = 'name' +_DOCKER_SCHEMA_1_REPO_TAG_KEY = 'tag' +_DOCKER_SCHEMA_1_FS_LAYERS_KEY = 'fsLayers' +_DOCKER_SCHEMA_1_HISTORY_KEY = 'history' +_DOCKER_SCHEMA_1_BLOB_SUM_KEY = 'blobSum' +_DOCKER_SCHEMA_1_V1_COMPAT_KEY = 'v1Compatibility' +_DOCKER_SCHEMA_1_ARCH_KEY = 'architecture' +_DOCKER_SCHEMA_1_SCHEMA_VER_KEY = 'schemaVersion' +_ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' +_JWS_ALGORITHM = 'RS256' + + +class ManifestException(Exception): + pass + + +class ManifestMalformed(ManifestException): + pass + + +class ManifestSignatureFailure(ManifestException): + pass + + +def _updated_v1_metadata(v1_metadata_json, updated_id_map): + parsed = json.loads(v1_metadata_json) + parsed['id'] = updated_id_map[parsed['id']] + + if parsed.get('parent') and parsed['parent'] in updated_id_map: + parsed['parent'] = updated_id_map[parsed['parent']] + + if parsed.get('container_config', {}).get('Image'): + existing_image = parsed['container_config']['Image'] + if existing_image in updated_id_map: + parsed['container_config']['image'] = updated_id_map[existing_image] + + return json.dumps(parsed) + + +class DockerSchema1Manifest(object): + def __init__(self, manifest_bytes, validate=True): + self._layers = None + self._bytes = manifest_bytes + + self._parsed = json.loads(manifest_bytes) + self._signatures = self._parsed[_DOCKER_SCHEMA_1_SIGNATURES_KEY] + self._tag = self._parsed[_DOCKER_SCHEMA_1_REPO_TAG_KEY] + + repo_name_tuple = self._parsed[_DOCKER_SCHEMA_1_REPO_NAME_KEY].split('/') + if len(repo_name_tuple) > 1: + self._namespace, self._repo_name = repo_name_tuple + elif len(repo_name_tuple) == 1: + self._namespace = '' + self._repo_name = repo_name_tuple[0] + else: + raise ManifestMalformed('malformed repository name') + + if validate: + self._validate() + + def _validate(self): + for signature in self._signatures: + bytes_to_verify = '{0}.{1}'.format(signature['protected'], + base64url_encode(self.payload)) + signer = SIGNER_ALGS[signature['header']['alg']] + key = keyrep(signature['header']['jwk']) + gk = key.get_key() + sig = base64url_decode(signature['signature'].encode('utf-8')) + verified = signer.verify(bytes_to_verify, sig, gk) + if not verified: + raise ManifestSignatureFailure() + + @property + def signatures(self): + return self._signatures + + @property + def namespace(self): + return self._namespace + + @property + def repo_name(self): + return self._repo_name + + @property + def tag(self): + return self._tag + + @property + def bytes(self): + return self._bytes + + @property + def manifest_json(self): + return self._parsed + + @property + def digest(self): + return digest_tools.sha256_digest(self.payload) + + @property + def image_ids(self): + return {mdata.v1_metadata.image_id for mdata in self.layers} + + @property + def parent_image_ids(self): + return {mdata.v1_metadata.parent_image_id for mdata in self.layers + if mdata.v1_metadata.parent_image_id} + + @property + def checksums(self): + return list({str(mdata.digest) for mdata in self.layers}) + + @property + def layers(self): + if self._layers is None: + self._layers = list(self._generate_layers()) + return self._layers + + def _generate_layers(self): + """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, + starting from the base image and working toward the leaf node. + """ + for blob_sum_obj, history_obj in reversed(zip(self._parsed[_DOCKER_SCHEMA_1_FS_LAYERS_KEY], + self._parsed[_DOCKER_SCHEMA_1_HISTORY_KEY])): + + try: + image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) + except digest_tools.InvalidDigestException: + raise ManifestMalformed('could not parse manifest digest: %s' % + blob_sum_obj[_DOCKER_SCHEMA_1_BLOB_SUM_KEY]) + + metadata_string = history_obj[_DOCKER_SCHEMA_1_V1_COMPAT_KEY] + + v1_metadata = json.loads(metadata_string) + command_list = v1_metadata.get('container_config', {}).get('Cmd', None) + command = json.dumps(command_list) if command_list else None + + if not 'id' in v1_metadata: + raise ManifestMalformed('invalid manifest v1 history') + + extracted = ExtractedDockerV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), + v1_metadata.get('created'), v1_metadata.get('comment'), + command) + yield ExtractedLayerMetadata(image_digest, extracted, metadata_string) + + @property + def payload(self): + protected = str(self._signatures[0][_DOCKER_SCHEMA_1_PROTECTED_KEY]) + parsed_protected = json.loads(base64url_decode(protected)) + signed_content_head = self._bytes[:parsed_protected[_DOCKER_SCHEMA_1_FORMAT_LENGTH_KEY]] + signed_content_tail = base64url_decode(str(parsed_protected[_DOCKER_SCHEMA_1_FORMAT_TAIL_KEY])) + return signed_content_head + signed_content_tail + + def rewrite_invalid_image_ids(self, images_map): + """ + Rewrites Docker v1 image IDs and returns a generator of DockerV1Metadata. + + If Docker gives us a layer with a v1 image ID that already points to existing + content, but the checksums don't match, then we need to rewrite the image ID + to something new in order to ensure consistency. + """ + # used to synthesize a new "content addressable" image id + digest_history = hashlib.sha256() + + has_rewritten_ids = False + updated_id_map = {} + for extracted_layer_metadata in self.layers: + digest_str = str(extracted_layer_metadata.digest) + extracted_v1_metadata = extracted_layer_metadata.v1_metadata + working_image_id = extracted_v1_metadata.image_id + + # Update our digest_history hash for the new layer data. + digest_history.update(digest_str) + digest_history.update("@") + digest_history.update(extracted_layer_metadata.v1_metadata_str.encode('utf-8')) + digest_history.update("|") + + # Ensure that the v1 image's storage matches the V2 blob. If not, we've + # found a data inconsistency and need to create a new layer ID for the V1 + # image, and all images that follow it in the ancestry chain. + digest_mismatch = (extracted_v1_metadata.image_id in images_map and + images_map[extracted_v1_metadata.image_id].content_checksum != digest_str) + if digest_mismatch or has_rewritten_ids: + working_image_id = digest_history.hexdigest() + has_rewritten_ids = True + + # Store the new docker id in the map + updated_id_map[extracted_v1_metadata.image_id] = working_image_id + + # Lookup the parent image for the layer, if any. + parent_image_id = None + if extracted_v1_metadata.parent_image_id is not None: + parent_image_id = images_map.get(extracted_v1_metadata.parent_image_id, None) + if parent_image_id is None: + raise ManifestMalformed( + 'Parent not found with image ID: {0}'.format(extracted_v1_metadata.parent_image_id) + ) + + # Synthesize and store the v1 metadata in the db. + v1_metadata_json = extracted_layer_metadata.v1_metadata_str + if has_rewritten_ids: + v1_metadata_json = _updated_v1_metadata(v1_metadata_json, updated_id_map) + + yield DockerV1Metadata( + image_id=working_image_id, + created=extracted_v1_metadata.created, + comment=extracted_v1_metadata.comment, + command=extracted_v1_metadata.command, + compat_json=v1_metadata_json, + parent_image_id=parent_image_id, + ) + + +class DockerSchema1ManifestBuilder(object): + """ Class which represents a manifest which is currently being built. """ + def __init__(self, namespace_name, repo_name, tag, architecture='amd64'): + repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) + if namespace_name == '': + repo_name_key = repo_name + + self._base_payload = { + _DOCKER_SCHEMA_1_REPO_TAG_KEY: tag, + _DOCKER_SCHEMA_1_REPO_NAME_KEY: repo_name_key, + _DOCKER_SCHEMA_1_ARCH_KEY: architecture, + _DOCKER_SCHEMA_1_SCHEMA_VER_KEY: 1, + } + + self._fs_layer_digests = [] + self._history = [] + + def add_layer(self, layer_digest, v1_json_metadata): + self._fs_layer_digests.append({ + _DOCKER_SCHEMA_1_BLOB_SUM_KEY: layer_digest, + }) + self._history.append({ + _DOCKER_SCHEMA_1_V1_COMPAT_KEY: v1_json_metadata, + }) + return self + + + def build(self, json_web_key): + """ Build the payload and sign it, returning a SignedManifest object. + """ + payload = OrderedDict(self._base_payload) + payload.update({ + _DOCKER_SCHEMA_1_HISTORY_KEY: self._history, + _DOCKER_SCHEMA_1_FS_LAYERS_KEY: self._fs_layer_digests, + }) + + payload_str = json.dumps(payload, indent=3) + + split_point = payload_str.rfind('\n}') + + protected_payload = { + 'formatTail': base64url_encode(payload_str[split_point:]), + 'formatLength': split_point, + 'time': datetime.utcnow().strftime(_ISO_DATETIME_FORMAT_ZULU), + } + protected = base64url_encode(json.dumps(protected_payload)) + logger.debug('Generated protected block: %s', protected) + + bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str)) + + signer = SIGNER_ALGS[_JWS_ALGORITHM] + signature = base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) + logger.debug('Generated signature: %s', signature) + + public_members = set(json_web_key.public_members) + public_key = {comp: value for comp, value in json_web_key.to_dict().items() + if comp in public_members} + + signature_block = { + 'header': { + 'jwk': public_key, + 'alg': _JWS_ALGORITHM, + }, + 'signature': signature, + _DOCKER_SCHEMA_1_PROTECTED_KEY: protected, + } + + logger.debug('Encoded signature block: %s', json.dumps(signature_block)) + + payload.update({ + _DOCKER_SCHEMA_1_SIGNATURES_KEY: [signature_block], + }) + + return DockerSchema1Manifest(json.dumps(payload, indent=3)) + + +Repository = namedtuple('Repository', ['id', 'name', 'namespace_name']) + +Tag = namedtuple('Tag', ['name', 'repository']) + +ManifestJSON = namedtuple('ManifestJSON', ['digest', 'json']) + +DockerV1Metadata = namedtuple('DockerV1Metadata', ['namespace_name', + 'repo_name', + 'image_id', + 'checksum', + 'content_checksum', + 'created', + 'comment', + 'command', + 'parent_image_id', + 'compat_json']) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index b1302d1b8..cb92b1ebe 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -1,34 +1,31 @@ import logging -import json -import hashlib -from collections import namedtuple, OrderedDict -from datetime import datetime from functools import wraps -import jwt.utils - -from peewee import IntegrityError from flask import make_response, request, url_for -from jwkest.jws import SIGNER_ALGS, keyrep import features from app import docker_v2_signing_key, app, metric_queue from auth.registry_jwt_auth import process_registry_jwt_auth +from data import model +from data.types import ( + DockerSchema1Manifest, + DockerSchema1ManifestBuilder, + ManifestException, + DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE, + DOCKER_SCHEMA2_CONTENT_TYPES, +) +from digest import digest_tools from endpoints.common import parse_repository_name from endpoints.decorators import anon_protect from endpoints.v2 import v2_bp, require_repo_read, require_repo_write from endpoints.v2.errors import (BlobUnknown, ManifestInvalid, ManifestUnknown, TagInvalid, - NameInvalid, TagAlreadyExists) + NameInvalid) from endpoints.trackhelper import track_and_log from endpoints.notificationhelper import spawn_notification from util.registry.replication import queue_storage_replication from util.names import VALID_TAG_PATTERN -from digest import digest_tools -from data import model -from data.model import TagAlreadyCreatedException -from data.database import RepositoryTag logger = logging.getLogger(__name__) @@ -37,234 +34,29 @@ BASE_MANIFEST_ROUTE = '//manifests/ 1: - self._namespace, self._repo_name = repo_name_tuple - elif len(repo_name_tuple) == 1: - self._namespace = '' - self._repo_name = repo_name_tuple[0] - else: - raise ValueError('repo_name has too many or too few pieces') - - if validate: - self._validate() - - def _validate(self): - for signature in self._signatures: - bytes_to_verify = '{0}.{1}'.format(signature['protected'], - jwt.utils.base64url_encode(self.payload)) - signer = SIGNER_ALGS[signature['header']['alg']] - key = keyrep(signature['header']['jwk']) - gk = key.get_key() - sig = jwt.utils.base64url_decode(signature['signature'].encode('utf-8')) - verified = signer.verify(bytes_to_verify, sig, gk) - if not verified: - raise ValueError('manifest file failed signature verification') - - @property - def signatures(self): - return self._signatures - - @property - def namespace(self): - return self._namespace - - @property - def repo_name(self): - return self._repo_name - - @property - def tag(self): - return self._tag - - @property - def bytes(self): - return self._bytes - - @property - def digest(self): - return digest_tools.sha256_digest(self.payload) - - @property - def layers(self): - """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, - starting from the base image and working toward the leaf node. - """ - for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY], - self._parsed[_HISTORY_KEY])): - - try: - image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY]) - except digest_tools.InvalidDigestException: - err_message = 'could not parse manifest digest: %s' % blob_sum_obj[_BLOB_SUM_KEY] - raise ManifestInvalid(detail={'message': err_message}) - - metadata_string = history_obj[_V1_COMPAT_KEY] - - v1_metadata = json.loads(metadata_string) - command_list = v1_metadata.get('container_config', {}).get('Cmd', None) - command = json.dumps(command_list) if command_list else None - - if not 'id' in v1_metadata: - raise ManifestInvalid(detail={'message': 'invalid manifest v1 history'}) - - labels = v1_metadata.get('config', {}).get('Labels', {}) or {} - extracted = ExtractedV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), - v1_metadata.get('created'), v1_metadata.get('comment'), - command, labels) - yield ImageMetadata(image_digest, extracted, metadata_string) - - @property - def payload(self): - protected = str(self._signatures[0][_PROTECTED_KEY]) - parsed_protected = json.loads(jwt.utils.base64url_decode(protected)) - signed_content_head = self._bytes[:parsed_protected[_FORMAT_LENGTH_KEY]] - signed_content_tail = jwt.utils.base64url_decode(str(parsed_protected[_FORMAT_TAIL_KEY])) - return signed_content_head + signed_content_tail - - -class SignedManifestBuilder(object): - """ Class which represents a manifest which is currently being built. - """ - def __init__(self, namespace_name, repo_name, tag, architecture='amd64', schema_ver=1): - repo_name_key = '{0}/{1}'.format(namespace_name, repo_name) - if namespace_name == '': - repo_name_key = repo_name - - self._base_payload = { - _REPO_TAG_KEY: tag, - _REPO_NAME_KEY: repo_name_key, - _ARCH_KEY: architecture, - _SCHEMA_VER: schema_ver, - } - - self._fs_layer_digests = [] - self._history = [] - - def add_layer(self, layer_digest, v1_json_metadata): - self._fs_layer_digests.append({ - _BLOB_SUM_KEY: layer_digest, - }) - self._history.append({ - _V1_COMPAT_KEY: v1_json_metadata, - }) - return self - - - def build(self, json_web_key): - """ Build the payload and sign it, returning a SignedManifest object. - """ - payload = OrderedDict(self._base_payload) - payload.update({ - _HISTORY_KEY: self._history, - _FS_LAYERS_KEY: self._fs_layer_digests, - }) - - payload_str = json.dumps(payload, indent=3) - - split_point = payload_str.rfind('\n}') - - protected_payload = { - 'formatTail': jwt.utils.base64url_encode(payload_str[split_point:]), - 'formatLength': split_point, - 'time': datetime.utcnow().strftime(ISO_DATETIME_FORMAT_ZULU), - } - protected = jwt.utils.base64url_encode(json.dumps(protected_payload)) - logger.debug('Generated protected block: %s', protected) - - bytes_to_sign = '{0}.{1}'.format(protected, jwt.utils.base64url_encode(payload_str)) - - signer = SIGNER_ALGS[JWS_ALGORITHM] - signature = jwt.utils.base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) - logger.debug('Generated signature: %s', signature) - - public_members = set(json_web_key.public_members) - public_key = {comp: value for comp, value in json_web_key.to_dict().items() - if comp in public_members} - - signature_block = { - 'header': { - 'jwk': public_key, - 'alg': JWS_ALGORITHM, - }, - 'signature': signature, - _PROTECTED_KEY: protected, - } - - logger.debug('Encoded signature block: %s', json.dumps(signature_block)) - - payload.update({ - _SIGNATURES_KEY: [signature_block], - }) - - return SignedManifest(json.dumps(payload, indent=3)) - - @v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['GET']) @parse_repository_name() @process_registry_jwt_auth(scopes=['pull']) @require_repo_read @anon_protect -def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): - try: - manifest = model.tag.load_tag_manifest(namespace_name, repo_name, manifest_ref) - except model.InvalidManifestException: - try: - model.tag.get_active_tag(namespace_name, repo_name, manifest_ref) - except RepositoryTag.DoesNotExist: +def fetch_manifest_by_tagname(namespace_name, repo_name, tag_name): + manifest = v2.get_manifest_by_tag(namespace_name, repo_name, tag_name) + if manifest is None: + tag = v2.get_active_tag(namespace_name, repo_name, tag_name) + if tag is None: raise ManifestUnknown() - try: - manifest = _generate_and_store_manifest(namespace_name, repo_name, manifest_ref) - except model.DataModelException: - logger.exception('Exception when generating manifest for %s/%s:%s', namespace_name, repo_name, - manifest_ref) + manifest = _generate_and_store_manifest(namespace_name, repo_name, tag_name) + if manifest is None: raise ManifestUnknown() - repo = model.repository.get_repository(namespace_name, repo_name) + repo = v2.get_repository(namespace_name, repo_name) if repo is not None: track_and_log('pull_repo', repo, analytics_name='pull_repo_100x', analytics_sample=0.01) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) - response = make_response(manifest.json_data, 200) - response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE + response = make_response(manifest.bytes, 200) + response.headers['Content-Type'] = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE response.headers['Docker-Content-Digest'] = manifest.digest return response @@ -275,19 +67,18 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): @require_repo_read @anon_protect def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): - try: - manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) - except model.InvalidManifestException: + manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) + if manifest is None: # Without a tag name to reference, we can't make an attempt to generate the manifest raise ManifestUnknown() - repo = model.repository.get_repository(namespace_name, repo_name) + repo = v2.get_repository(namespace_name, repo_name) if repo is not None: track_and_log('pull_repo', repo) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2']) - response = make_response(manifest.json_data, 200) - response.headers['Content-Type'] = MANIFEST_CONTENT_TYPE + response = make_response(manifest.json, 200) + response.headers['Content-Type'] = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE response.headers['Docker-Content-Digest'] = manifest.digest return response @@ -295,7 +86,7 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): def _reject_manifest2_schema2(func): @wraps(func) def wrapped(*args, **kwargs): - if request.content_type in MANIFEST2_SCHEMA2_CONTENT_TYPES: + if request.content_type in DOCKER_SCHEMA2_CONTENT_TYPES: raise ManifestInvalid(detail={'message': 'manifest schema version not supported'}, http_status_code=415) return func(*args, **kwargs) @@ -308,13 +99,13 @@ def _reject_manifest2_schema2(func): @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect -def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref): +def write_manifest_by_tagname(namespace_name, repo_name, tag_name): try: - manifest = SignedManifest(request.data) - except ValueError: - raise ManifestInvalid(detail={'message': 'could not parse manifest'}) + manifest = DockerSchema1Manifest(request.data) + except ManifestException as me: + raise ManifestInvalid(detail={'message': me.message}) - if manifest.tag != manifest_ref: + if manifest.tag != tag_name: raise TagInvalid() return _write_manifest(namespace_name, repo_name, manifest) @@ -326,39 +117,22 @@ def write_manifest_by_tagname(namespace_name, repo_name, manifest_ref): @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect -def write_manifest_by_digest(namespace_name, repo_name, manifest_ref): +def write_manifest_by_digest(namespace_name, repo_name, digest): try: - manifest = SignedManifest(request.data) - except ValueError: - raise ManifestInvalid(detail={'message': 'could not parse manifest'}) + manifest = DockerSchema1Manifest(request.data) + except ManifestException as me: + raise ManifestInvalid(detail={'message': me.message}) - if manifest.digest != manifest_ref: + if manifest.digest != digest: raise ManifestInvalid(detail={'message': 'manifest digest mismatch'}) return _write_manifest(namespace_name, repo_name, manifest) -def _updated_v1_metadata(v1_metadata_json, updated_id_map): - parsed = json.loads(v1_metadata_json) - parsed['id'] = updated_id_map[parsed['id']] - - if parsed.get('parent') and parsed['parent'] in updated_id_map: - parsed['parent'] = updated_id_map[parsed['parent']] - - if parsed.get('container_config', {}).get('Image'): - existing_image = parsed['container_config']['Image'] - if existing_image in updated_id_map: - parsed['container_config']['image'] = updated_id_map[existing_image] - - return json.dumps(parsed) - - -def _write_manifest_itself(namespace_name, repo_name, manifest): - # Ensure that the manifest is for this repository. If the manifest's namespace is empty, then - # it is for the library namespace and we need an extra check. - if (manifest.namespace == '' and features.LIBRARY_SUPPORT and +def _write_manifest(namespace_name, repo_name, manifest): + if (manifest.namespace == '' and + features.LIBRARY_SUPPORT and namespace_name == app.config['LIBRARY_NAMESPACE']): - # This is a library manifest. All good. pass elif manifest.namespace != namespace_name: raise NameInvalid() @@ -367,135 +141,65 @@ def _write_manifest_itself(namespace_name, repo_name, manifest): raise NameInvalid() # Ensure that the repository exists. - repo = model.repository.get_repository(namespace_name, repo_name) + repo = v2.get_repository(namespace_name, repo_name) if repo is None: raise NameInvalid() - # Lookup all the images and their parent images (if any) inside the manifest. This will let us - # know which V1 images we need to synthesize and which ones are invalid. - layers = list(manifest.layers) - - docker_image_ids = {mdata.v1_metadata.docker_id for mdata in layers} - parent_image_ids = {mdata.v1_metadata.parent for mdata in layers - if mdata.v1_metadata.parent} - all_image_ids = list(docker_image_ids | parent_image_ids) - - images_query = model.image.lookup_repository_images(repo, all_image_ids) - images_map = {image.docker_image_id: image for image in images_query} - - # Lookup the storages associated with each blob in the manifest. - checksums = list({str(mdata.digest) for mdata in manifest.layers}) - storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums) - storage_map = {storage.content_checksum: storage for storage in storage_query} - - # Ensure that we have valid V1 docker IDs. If Docker gives us a V1 layer ID pointing to - # a storage with a content checksum different from the existing, then we need to rewrite - # the Docker ID to ensure consistency. - tag_name = manifest.tag - has_rewritten_ids = False - updated_id_map = {} - - # Synthesized image id hash. Can be used to pull a "content addressable" image id out of thin air. - digest_history = hashlib.sha256() - - for mdata in layers: - digest_str = str(mdata.digest) - v1_mdata = mdata.v1_metadata - working_docker_id = v1_mdata.docker_id - - # Update our digest_history hash for the new layer data. - digest_history.update(digest_str) - digest_history.update("@") - digest_history.update(mdata.v1_metadata_str.encode('utf-8')) - digest_history.update("|") - - # Ensure that all blobs exist. - blob_storage = storage_map.get(digest_str) - if blob_storage is None: - raise BlobUnknown(detail={'digest': digest_str}) - - # Ensure that the V1 image's storage matches the V2 blob. If not, we've found - # a data inconsistency and need to create a new layer ID for the V1 image, and all images - # that follow it in the ancestry chain. - if ((v1_mdata.docker_id in images_map and - images_map[v1_mdata.docker_id].storage.content_checksum != digest_str) or - has_rewritten_ids): - - working_docker_id = digest_history.hexdigest() - logger.warning('Rewriting docker_id %s/%s %s -> %s', namespace_name, repo_name, - v1_mdata.docker_id, working_docker_id) - has_rewritten_ids = True - - # Store the new docker id in the map - updated_id_map[v1_mdata.docker_id] = working_docker_id - - # Lookup the parent image for the layer, if any. - parent_image = None - if v1_mdata.parent is not None: - parent_image = images_map.get(v1_mdata.parent) - if parent_image is None: - msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent) - raise ManifestInvalid(detail={'message': msg}) - - # Synthesize and store the v1 metadata in the db. - v1_metadata_json = mdata.v1_metadata_str - if has_rewritten_ids: - v1_metadata_json = _updated_v1_metadata(mdata.v1_metadata_str, updated_id_map) - - image = model.image.synthesize_v1_image(repo, blob_storage, working_docker_id, - v1_mdata.created, v1_mdata.comment, v1_mdata.command, - v1_metadata_json, parent_image) - images_map[v1_mdata.docker_id] = image - - if not layers: - # The manifest doesn't actually reference any layers! + if not manifest.layers: raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) - # Store the manifest pointing to the tag. - manifest_digest = manifest.digest - leaf_layer_id = images_map[layers[-1].v1_metadata.docker_id].docker_image_id + # Ensure all the blobs in the manifest exist. + storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, manifest.checksums) + storage_map = {storage.content_checksum: storage for storage in storage_query} + for extracted_layer_metadata in manifest.layers: + digest_str = str(extracted_layer_metadata.digest) + if digest_str not in storage_map: + raise BlobUnknown(detail={'digest': digest_str}) + # Lookup all the images and their parent images (if any) inside the manifest. + # This will let us know which v1 images we need to synthesize and which ones are invalid. + all_image_ids = list(manifest.docker_image_ids | manifest.parent_image_ids) + images = v2.docker_v1_metadata_by_image_id(namespace_name, repo_name, all_image_ids) + images_map = {image.image_id: image for image in images} + + # Rewrite any v1 image IDs that do not match the checksum in the database. try: - tag_manifest, manifest_created = model.tag.store_tag_manifest(namespace_name, repo_name, - tag_name, leaf_layer_id, - manifest_digest, manifest.bytes) - except TagAlreadyCreatedException: - logger.warning('Tag %s was already created under repository %s/%s pointing to image %s', - tag_name, namespace_name, repo_name, leaf_layer_id) - raise TagAlreadyExists() + rewritten_images = manifest.rewrite_invalid_image_ids(images_map) + for rewritten_image in rewritten_images: + image = v2.synthesize_v1_image( + repo, + storage_map[rewritten_image.content_checksum], + rewritten_image.image_id, + rewritten_image.created, + rewritten_image.comment, + rewritten_image.command, + rewritten_image.compat_json, + rewritten_image.parent_image_id, + ) + images_map[image.image_id] = image + except ManifestException as me: + raise ManifestInvalid(detail={'message': me.message}) - if manifest_created: - for key, value in layers[-1].v1_metadata.labels.iteritems(): - model.label.create_manifest_label(tag_manifest, key, value, 'manifest') + # Store the manifest pointing to the tag. + leaf_layer_id = images_map[manifest.layers[-1].v1_metadata.image_id].image_id + v2.save_manifest(namespace_name, repo_name, tag_name, leaf_layer_id, manifest.digest, manifest.bytes) # Queue all blob manifests for replication. # TODO(jschorr): Find a way to optimize this insertion. if features.STORAGE_REPLICATION: - for mdata in layers: - digest_str = str(mdata.digest) - blob_storage = storage_map.get(digest_str) - queue_storage_replication(namespace_name, blob_storage) + for extracted_v1_metadata in manifest.layers: + digest_str = str(extracted_v1_metadata.digest) + queue_storage_replication(namespace_name, storage_map[digest_str]) - return (repo, tag_name, manifest_digest) - - -def _write_manifest(namespace_name, repo_name, manifest): - (repo, tag_name, manifest_digest) = _write_manifest_itself(namespace_name, repo_name, manifest) - - # Spawn the repo_push event. - event_data = { - 'updated_tags': [tag_name], - } - - track_and_log('push_repo', repo, tag=tag_name) - spawn_notification(repo, 'repo_push', event_data) + track_and_log('push_repo', repo, tag=manifest.tag) + spawn_notification(repo, 'repo_push', {'updated_tags': [manifest.tag]}) metric_queue.repository_push.Inc(labelvalues=[namespace_name, repo_name, 'v2']) response = make_response('OK', 202) - response.headers['Docker-Content-Digest'] = manifest_digest + response.headers['Docker-Content-Digest'] = manifest.digest response.headers['Location'] = url_for('v2.fetch_manifest_by_digest', repository='%s/%s' % (namespace_name, repo_name), - manifest_ref=manifest_digest) + manifest_ref=manifest.digest) return response @@ -504,33 +208,34 @@ def _write_manifest(namespace_name, repo_name, manifest): @process_registry_jwt_auth(scopes=['pull', 'push']) @require_repo_write @anon_protect -def delete_manifest_by_digest(namespace_name, repo_name, manifest_ref): - """ Delete the manifest specified by the digest. Note: there is no equivalent - method for deleting by tag name because it is forbidden by the spec. +def delete_manifest_by_digest(namespace_name, repo_name, digest): """ - try: - manifest = model.tag.load_manifest_by_digest(namespace_name, repo_name, manifest_ref) - except model.InvalidManifestException: - # Without a tag name to reference, we can't make an attempt to generate the manifest + Delete the manifest specified by the digest. + + Note: there is no equivalent method for deleting by tag name because it is + forbidden by the spec. + """ + tag = v2.get_tag_by_manifest_digest(namespace_name, repo_name, digest) + if tag is None: + # TODO(jzelinskie): disambiguate between no manifest and no tag raise ManifestUnknown() # Mark the tag as no longer alive. - try: - model.tag.delete_tag(namespace_name, repo_name, manifest.tag.name) - except model.DataModelException: - # Tag is not alive. + deleted = v2.delete_tag(namespace_name, repo_name, tag.name) + if not deleted: + # Tag was not alive. raise ManifestUnknown() - track_and_log('delete_tag', manifest.tag.repository, - tag=manifest.tag.name, digest=manifest_ref) + track_and_log('delete_tag', tag.repository, tag=tag.name, digest=digest) return make_response('', 202) def _generate_and_store_manifest(namespace_name, repo_name, tag_name): - # First look up the tag object and its ancestors - image = model.tag.get_tag_image(namespace_name, repo_name, tag_name, include_storage=True) - parents = model.image.get_parent_images(namespace_name, repo_name, image) + # Find the v1 metadata for this image and its parents. + v1_metadata = v2.docker_v1_metadata_by_tag(namespace_name, repo_name, tag_name) + parents_v1_metadata = v2.get_parents_docker_v1_metadata(namespace_name, repo_name, + v1_metadata.image_id) # If the manifest is being generated under the library namespace, then we make its namespace # empty. @@ -539,26 +244,18 @@ def _generate_and_store_manifest(namespace_name, repo_name, tag_name): manifest_namespace = '' # Create and populate the manifest builder - builder = SignedManifestBuilder(manifest_namespace, repo_name, tag_name) + builder = DockerSchema1ManifestBuilder(manifest_namespace, repo_name, tag_name) # Add the leaf layer - builder.add_layer(image.storage.content_checksum, image.v1_json_metadata) + builder.add_layer(v1_metadata.content_checksum, v1_metadata.compat_json) - for parent in parents: - builder.add_layer(parent.storage.content_checksum, parent.v1_json_metadata) + for parent_v1_metadata in parents_v1_metadata: + builder.add_layer(parent_v1_metadata.content_checksum, parent_v1_metadata.compat_json) # Sign the manifest with our signing key. manifest = builder.build(docker_v2_signing_key) - # Write the manifest to the DB. If an existing manifest already exists, return the - # one found. - try: - return model.tag.associate_generated_tag_manifest(namespace_name, repo_name, tag_name, - manifest.digest, manifest.bytes) - except IntegrityError as ie: - logger.debug('Got integrity error: %s', ie) - try: - return model.tag.load_tag_manifest(namespace_name, repo_name, tag_name) - except model.InvalidManifestException: - logger.exception('Exception when generating manifest') - raise model.DataModelException('Could not load or generate manifest') + # Write the manifest to the DB. + v2.create_manifest_and_update_tag(namespace_name, repo_name, tag_name, manifest.digest, + manifest.bytes) + return manifest