diff --git a/data/model/oci/manifest.py b/data/model/oci/manifest.py index cf1e573aa..299f86b8b 100644 --- a/data/model/oci/manifest.py +++ b/data/model/oci/manifest.py @@ -149,7 +149,7 @@ def _create_manifest(repository_id, manifest_interface_instance, storage): manifest = Manifest.create(repository=repository_id, digest=manifest_interface_instance.digest, media_type=media_type, - manifest_bytes=manifest_interface_instance.bytes) + manifest_bytes=manifest_interface_instance.bytes.as_encoded_str()) except IntegrityError: manifest = Manifest.get(repository=repository_id, digest=manifest_interface_instance.digest) return CreatedManifest(manifest=manifest, newly_created=False, labels_to_apply=None) diff --git a/data/model/oci/tag.py b/data/model/oci/tag.py index 622e586a8..178747081 100644 --- a/data/model/oci/tag.py +++ b/data/model/oci/tag.py @@ -11,6 +11,7 @@ from data.model.oci.shared import get_legacy_image_for_manifest from data.model import config from image.docker.schema1 import (DOCKER_SCHEMA1_CONTENT_TYPES, DockerSchema1Manifest, MalformedSchema1Manifest) +from util.bytes import Bytes from util.timedeltastring import convert_to_timedelta logger = logging.getLogger(__name__) @@ -215,7 +216,8 @@ def retarget_tag(tag_name, manifest_id, is_reversion=False, now_ms=None): # name. if manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES: try: - parsed = DockerSchema1Manifest(manifest.manifest_bytes, validate=False) + parsed = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest.manifest_bytes), + validate=False) if parsed.tag != tag_name: logger.error('Tried to re-target schema1 manifest with tag `%s` to tag `%s', parsed.tag, tag_name) diff --git a/data/model/oci/test/test_oci_manifest.py b/data/model/oci/test/test_oci_manifest.py index 1e14264db..68e3b4b69 100644 --- a/data/model/oci/test/test_oci_manifest.py +++ b/data/model/oci/test/test_oci_manifest.py @@ -18,6 +18,7 @@ from data.model.storage import get_layer_path from image.docker.schema1 import DockerSchema1ManifestBuilder, DockerSchema1Manifest from image.docker.schema2.manifest import DockerSchema2ManifestBuilder from image.docker.schema2.list import DockerSchema2ManifestListBuilder +from util.bytes import Bytes from test.fixtures import * @@ -163,7 +164,7 @@ def test_get_or_create_manifest(schema_version, initialized_db): assert created is not None assert created.media_type.name == sample_manifest_instance.media_type assert created.digest == sample_manifest_instance.digest - assert created.manifest_bytes == sample_manifest_instance.bytes + assert created.manifest_bytes == sample_manifest_instance.bytes.as_encoded_str() assert created_manifest.labels_to_apply == expected_labels # Verify the legacy image. @@ -199,7 +200,8 @@ def test_get_or_create_manifest_invalid_image(initialized_db): repository = get_repository('devtable', 'simple') latest_tag = get_tag(repository, 'latest') - parsed = DockerSchema1Manifest(latest_tag.manifest.manifest_bytes, validate=False) + parsed = DockerSchema1Manifest(Bytes.for_string_or_unicode(latest_tag.manifest.manifest_bytes), + validate=False) builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'anothertag') builder.add_layer(parsed.blob_digests[0], '{"id": "foo", "parent": "someinvalidimageid"}') diff --git a/data/model/tag.py b/data/model/tag.py index 068641e8a..b3ed24adb 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -793,7 +793,8 @@ def populate_manifest(repository, manifest, legacy_image, storage_ids): with db_transaction(): try: manifest_row = Manifest.create(digest=manifest.digest, repository=repository, - manifest_bytes=manifest.bytes, media_type=media_type) + manifest_bytes=manifest.bytes.as_encoded_str(), + media_type=media_type) except IntegrityError: return Manifest.get(repository=repository, digest=manifest.digest) diff --git a/data/model/test/test_tag.py b/data/model/test/test_tag.py index 731a6d7fe..2f5adf773 100644 --- a/data/model/test/test_tag.py +++ b/data/model/test/test_tag.py @@ -325,7 +325,7 @@ def test_store_tag_manifest(get_storages, initialized_db): mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest) assert mapping_row.manifest is not None - assert mapping_row.manifest.manifest_bytes == manifest.bytes + assert mapping_row.manifest.manifest_bytes == manifest.bytes.as_encoded_str() assert mapping_row.manifest.digest == str(manifest.digest) blob_rows = {m.blob_id for m in diff --git a/data/registry_model/datatypes.py b/data/registry_model/datatypes.py index 95e64ea20..bb64da471 100644 --- a/data/registry_model/datatypes.py +++ b/data/registry_model/datatypes.py @@ -11,6 +11,7 @@ from image.docker import ManifestException from image.docker.schemas import parse_manifest_from_bytes from image.docker.schema1 import DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE from image.docker.schema2 import DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE +from util.bytes import Bytes class RepositoryReference(datatype('Repository', [])): @@ -176,7 +177,7 @@ class Tag(datatype('Tag', ['name', 'reversion', 'manifest_digest', 'lifetime_sta return self._db_id -class Manifest(datatype('Manifest', ['digest', 'media_type', 'manifest_bytes'])): +class Manifest(datatype('Manifest', ['digest', 'media_type', 'internal_manifest_bytes'])): """ Manifest represents a manifest in a repository. """ @classmethod def for_tag_manifest(cls, tag_manifest, legacy_image=None): @@ -184,7 +185,7 @@ class Manifest(datatype('Manifest', ['digest', 'media_type', 'manifest_bytes'])) return None return Manifest(db_id=tag_manifest.id, digest=tag_manifest.digest, - manifest_bytes=tag_manifest.json_data, + internal_manifest_bytes=Bytes.for_string_or_unicode(tag_manifest.json_data), media_type=DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE, # Always in legacy. inputs=dict(legacy_image=legacy_image, tag_manifest=True)) @@ -195,7 +196,7 @@ class Manifest(datatype('Manifest', ['digest', 'media_type', 'manifest_bytes'])) return Manifest(db_id=manifest.id, digest=manifest.digest, - manifest_bytes=manifest.manifest_bytes, + internal_manifest_bytes=Bytes.for_string_or_unicode(manifest.manifest_bytes), media_type=manifest.media_type.name, inputs=dict(legacy_image=legacy_image, tag_manifest=False)) @@ -221,8 +222,8 @@ class Manifest(datatype('Manifest', ['digest', 'media_type', 'manifest_bytes'])) def get_parsed_manifest(self, validate=True): """ Returns the parsed manifest for this manifest. """ - validate = False # Temporarily disable. - return parse_manifest_from_bytes(self.manifest_bytes, self.media_type, validate=validate) + return parse_manifest_from_bytes(self.internal_manifest_bytes, self.media_type, + validate=validate) @property def layers_compressed_size(self): diff --git a/data/registry_model/test/test_interface.py b/data/registry_model/test/test_interface.py index bfb26daeb..c1787faf5 100644 --- a/data/registry_model/test/test_interface.py +++ b/data/registry_model/test/test_interface.py @@ -15,16 +15,20 @@ from data import model from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob, ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image, TagManifestLabel, TagManifest, TagManifestLabel, DerivedStorageForImage, - TorrentInfo, Tag, TagToRepositoryTag, close_db_filter) + TorrentInfo, Tag, TagToRepositoryTag, close_db_filter, + ImageStorageLocation) from data.cache.impl import InMemoryDataModelCache from data.registry_model.registry_pre_oci_model import PreOCIModel from data.registry_model.registry_oci_model import OCIModel from data.registry_model.datatypes import RepositoryReference from data.registry_model.blobuploader import upload_blob, BlobUploadSettings from data.registry_model.modelsplitter import SplitModel +from data.model.blob import store_blob_record_and_temp_link from image.docker.types import ManifestImageLayer -from image.docker.schema1 import DockerSchema1ManifestBuilder, DOCKER_SCHEMA1_CONTENT_TYPES +from image.docker.schema1 import (DockerSchema1ManifestBuilder, DOCKER_SCHEMA1_CONTENT_TYPES, + DockerSchema1Manifest) from image.docker.schema2.manifest import DockerSchema2ManifestBuilder +from util.bytes import Bytes from test.fixtures import * @@ -823,3 +827,40 @@ def test_create_manifest_and_retarget_tag_with_labels(registry_model): # Ensure the labels were applied. assert tag.lifetime_end_ms is not None + + + +def _populate_blob(digest): + location = ImageStorageLocation.get(name='local_us') + store_blob_record_and_temp_link('devtable', 'simple', digest, location, 1, 120) + + +def test_known_issue_schema1(registry_model): + test_dir = os.path.dirname(os.path.abspath(__file__)) + path = os.path.join(test_dir, '../../../image/docker/test/validate_manifest_known_issue.json') + with open(path, 'r') as f: + manifest_bytes = f.read() + + manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes)) + + for blob_digest in manifest.local_blob_digests: + _populate_blob(blob_digest) + + digest = manifest.digest + assert digest == 'sha256:44518f5a4d1cb5b7a6347763116fb6e10f6a8563b6c40bb389a0a982f0a9f47a' + + # Create the manifest in the database. + repository_ref = registry_model.lookup_repository('devtable', 'simple') + created_manifest, _ = registry_model.create_manifest_and_retarget_tag(repository_ref, manifest, + 'latest', storage) + assert created_manifest + assert created_manifest.digest == manifest.digest + assert (created_manifest.internal_manifest_bytes.as_encoded_str() == + manifest.bytes.as_encoded_str()) + + # Look it up again and validate. + found = registry_model.lookup_manifest_by_digest(repository_ref, manifest.digest, allow_dead=True) + assert found + assert found.digest == digest + assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str() + assert found.get_parsed_manifest().digest == digest diff --git a/endpoints/api/manifest.py b/endpoints/api/manifest.py index 08dec2dc8..014f1f350 100644 --- a/endpoints/api/manifest.py +++ b/endpoints/api/manifest.py @@ -72,7 +72,7 @@ def _manifest_dict(manifest): return { 'digest': manifest.digest, 'is_manifest_list': manifest.is_manifest_list, - 'manifest_data': manifest.manifest_bytes, + 'manifest_data': manifest.internal_manifest_bytes.as_unicode(), 'image': image, 'layers': ([_layer_dict(lyr.layer_info, idx) for idx, lyr in enumerate(layers)] if layers else None), diff --git a/endpoints/api/tag.py b/endpoints/api/tag.py index 7b1f8c8cc..ed08b104c 100644 --- a/endpoints/api/tag.py +++ b/endpoints/api/tag.py @@ -39,7 +39,7 @@ def _tag_dict(tag): tag_info['manifest_digest'] = tag.manifest_digest if tag.manifest: try: - tag_info['manifest'] = json.loads(tag.manifest.manifest_bytes) + tag_info['manifest'] = json.loads(tag.manifest.internal_manifest_bytes.as_unicode()) except (TypeError, ValueError): pass diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 98cdfdb63..58e012a44 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -20,6 +20,7 @@ from image.docker.schema2 import DOCKER_SCHEMA2_CONTENT_TYPES, OCI_CONTENT_TYPES from image.docker.schemas import parse_manifest_from_bytes from notifications import spawn_notification from util.audit import track_and_log +from util.bytes import Bytes from util.names import VALID_TAG_PATTERN from util.registry.replication import queue_replication_batch @@ -72,7 +73,7 @@ def fetch_manifest_by_tagname(namespace_name, repo_name, manifest_ref): metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True]) return Response( - supported.bytes, + supported.bytes.as_unicode(), status=200, headers={ 'Content-Type': supported.media_type, @@ -109,7 +110,7 @@ def fetch_manifest_by_digest(namespace_name, repo_name, manifest_ref): track_and_log('pull_repo', repository_ref, manifest_digest=manifest_ref) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'v2', True]) - return Response(supported.bytes, status=200, headers={ + return Response(supported.bytes.as_unicode(), status=200, headers={ 'Content-Type': supported.media_type, 'Docker-Content-Digest': supported.digest, }) @@ -214,7 +215,7 @@ def _parse_manifest(): content_type = DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE try: - return parse_manifest_from_bytes(request.data, content_type) + return parse_manifest_from_bytes(Bytes.for_string_or_unicode(request.data), content_type) except ManifestException as me: logger.exception("failed to parse manifest when writing by tagname") raise ManifestInvalid(detail={'message': 'failed to parse manifest: %s' % me.message}) diff --git a/image/docker/schema1.py b/image/docker/schema1.py index 2e076e058..846b9e3aa 100644 --- a/image/docker/schema1.py +++ b/image/docker/schema1.py @@ -23,7 +23,8 @@ from image.docker import ManifestException from image.docker.types import ManifestImageLayer from image.docker.interfaces import ManifestInterface from image.docker.v1 import DockerV1Metadata -from image.docker.schemautil import ensure_utf8, to_canonical_json +from image.docker.schemautil import to_canonical_json +from util.bytes import Bytes logger = logging.getLogger(__name__) @@ -160,11 +161,13 @@ class DockerSchema1Manifest(ManifestInterface): } def __init__(self, manifest_bytes, validate=True): + assert isinstance(manifest_bytes, Bytes) + self._layers = None self._bytes = manifest_bytes try: - self._parsed = json.loads(manifest_bytes) + self._parsed = json.loads(manifest_bytes.as_encoded_str()) except ValueError as ve: raise MalformedSchema1Manifest('malformed manifest data: %s' % ve) @@ -193,13 +196,13 @@ class DockerSchema1Manifest(ManifestInterface): @classmethod def for_latin1_bytes(cls, encoded_bytes, validate=True): - return DockerSchema1Manifest(encoded_bytes.encode('utf-8'), validate) + return DockerSchema1Manifest(Bytes.for_string_or_unicode(encoded_bytes), validate) def _validate(self): if not self._signatures: return - payload_str = ensure_utf8(self._payload) + payload_str = self._payload for signature in self._signatures: bytes_to_verify = '{0}.{1}'.format(signature['protected'], base64url_encode(payload_str)) signer = SIGNER_ALGS[signature['header']['alg']] @@ -248,10 +251,6 @@ class DockerSchema1Manifest(ManifestInterface): def tag(self): return self._tag - @property - def json(self): - return self._bytes - @property def bytes(self): return self._bytes @@ -270,7 +269,7 @@ class DockerSchema1Manifest(ManifestInterface): @property def digest(self): - return digest_tools.sha256_digest(ensure_utf8(self._payload)) + return digest_tools.sha256_digest(self._payload) @property def image_ids(self): @@ -395,11 +394,12 @@ class DockerSchema1Manifest(ManifestInterface): @property def _payload(self): if self._signatures is None: - return self._bytes + return self._bytes.as_encoded_str() + byte_data = self._bytes.as_encoded_str() protected = str(self._signatures[0][DOCKER_SCHEMA1_PROTECTED_KEY]) parsed_protected = json.loads(base64url_decode(protected)) - signed_content_head = self._bytes[:parsed_protected[DOCKER_SCHEMA1_FORMAT_LENGTH_KEY]] + signed_content_head = byte_data[:parsed_protected[DOCKER_SCHEMA1_FORMAT_LENGTH_KEY]] signed_content_tail = base64url_decode(str(parsed_protected[DOCKER_SCHEMA1_FORMAT_TAIL_KEY])) return signed_content_head + signed_content_tail @@ -548,8 +548,9 @@ class DockerSchema1ManifestBuilder(object): payload_str = json.dumps(payload, indent=3, ensure_ascii=ensure_ascii) if json_web_key is None: - return DockerSchema1Manifest(payload_str) + return DockerSchema1Manifest(Bytes.for_string_or_unicode(payload_str)) + payload_str = Bytes.for_string_or_unicode(payload_str).as_encoded_str() split_point = payload_str.rfind('\n}') protected_payload = { @@ -560,7 +561,6 @@ class DockerSchema1ManifestBuilder(object): protected = base64url_encode(json.dumps(protected_payload, ensure_ascii=ensure_ascii)) logger.debug('Generated protected block: %s', protected) - payload_str = ensure_utf8(payload_str) bytes_to_sign = '{0}.{1}'.format(protected, base64url_encode(payload_str)) signer = SIGNER_ALGS[_JWS_SIGNING_ALGORITHM] @@ -579,7 +579,9 @@ class DockerSchema1ManifestBuilder(object): logger.debug('Encoded signature block: %s', json.dumps(signature_block)) payload.update({DOCKER_SCHEMA1_SIGNATURES_KEY: [signature_block]}) - return DockerSchema1Manifest(json.dumps(payload, indent=3, ensure_ascii=ensure_ascii)) + + json_str = json.dumps(payload, indent=3, ensure_ascii=ensure_ascii) + return DockerSchema1Manifest(Bytes.for_string_or_unicode(json_str)) def _updated_v1_metadata(v1_metadata_json, updated_id_map): diff --git a/image/docker/schema2/config.py b/image/docker/schema2/config.py index dc0ee475c..c5b14862f 100644 --- a/image/docker/schema2/config.py +++ b/image/docker/schema2/config.py @@ -102,7 +102,7 @@ from dateutil.parser import parse as parse_date from digest import digest_tools from image.docker import ManifestException -from image.docker.schemautil import ensure_utf8 +from util.bytes import Bytes DOCKER_SCHEMA2_CONFIG_HISTORY_KEY = "history" @@ -183,10 +183,12 @@ class DockerSchema2Config(object): } def __init__(self, config_bytes): + assert isinstance(config_bytes, Bytes) + self._config_bytes = config_bytes try: - self._parsed = json.loads(ensure_utf8(config_bytes)) + self._parsed = json.loads(config_bytes.as_unicode()) except ValueError as ve: raise MalformedSchema2Config('malformed config data: %s' % ve) @@ -198,12 +200,12 @@ class DockerSchema2Config(object): @property def digest(self): """ Returns the digest of this config object. """ - return digest_tools.sha256_digest(ensure_utf8(self._config_bytes)) + return digest_tools.sha256_digest(self._config_bytes.as_encoded_str()) @property def size(self): """ Returns the size of this config object. """ - return len(ensure_utf8(self._config_bytes)) + return len(self._config_bytes.as_encoded_str()) @property def bytes(self): diff --git a/image/docker/schema2/list.py b/image/docker/schema2/list.py index 233ce9f6e..195a3faba 100644 --- a/image/docker/schema2/list.py +++ b/image/docker/schema2/list.py @@ -12,7 +12,7 @@ from image.docker.schema1 import DockerSchema1Manifest from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE, DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE) from image.docker.schema2.manifest import DockerSchema2Manifest -from image.docker.schemautil import ensure_utf8 +from util.bytes import Bytes logger = logging.getLogger(__name__) @@ -67,10 +67,10 @@ class LazyManifestLoader(object): content_type = self._manifest_data[DOCKER_SCHEMA2_MANIFESTLIST_MEDIATYPE_KEY] if content_type == DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE: - return DockerSchema2Manifest(manifest_bytes) + return DockerSchema2Manifest(Bytes.for_string_or_unicode(manifest_bytes)) if content_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE: - return DockerSchema1Manifest(manifest_bytes, validate=False) + return DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=False) raise MalformedSchema2ManifestList('Unknown manifest content type') @@ -171,11 +171,13 @@ class DockerSchema2ManifestList(ManifestInterface): } def __init__(self, manifest_bytes): + assert isinstance(manifest_bytes, Bytes) + self._layers = None self._manifest_bytes = manifest_bytes try: - self._parsed = json.loads(ensure_utf8(manifest_bytes)) + self._parsed = json.loads(manifest_bytes.as_unicode()) except ValueError as ve: raise MalformedSchema2ManifestList('malformed manifest data: %s' % ve) @@ -196,7 +198,7 @@ class DockerSchema2ManifestList(ManifestInterface): @property def digest(self): """ The digest of the manifest, including type prefix. """ - return digest_tools.sha256_digest(ensure_utf8(self._manifest_bytes)) + return digest_tools.sha256_digest(self._manifest_bytes.as_encoded_str()) @property def media_type(self): @@ -319,7 +321,9 @@ class DockerSchema2ManifestListBuilder(object): def add_manifest(self, manifest, architecture, os): """ Adds a manifest to the list. """ manifest = manifest.unsigned() # Make sure we add the unsigned version to the list. - self.add_manifest_digest(manifest.digest, len(manifest.bytes), manifest.media_type, + self.add_manifest_digest(manifest.digest, + len(manifest.bytes.as_encoded_str()), + manifest.media_type, architecture, os) def add_manifest_digest(self, manifest_digest, manifest_size, media_type, architecture, os): @@ -345,4 +349,6 @@ class DockerSchema2ManifestListBuilder(object): } for manifest in self.manifests ], } - return DockerSchema2ManifestList(json.dumps(manifest_list_dict, indent=3)) + + json_str = Bytes.for_string_or_unicode(json.dumps(manifest_list_dict, indent=3)) + return DockerSchema2ManifestList(json_str) diff --git a/image/docker/schema2/manifest.py b/image/docker/schema2/manifest.py index 5731fa76d..d3a16d64f 100644 --- a/image/docker/schema2/manifest.py +++ b/image/docker/schema2/manifest.py @@ -16,7 +16,7 @@ from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_SIZE) from image.docker.schema1 import DockerSchema1ManifestBuilder from image.docker.schema2.config import DockerSchema2Config -from image.docker.schemautil import ensure_utf8 +from util.bytes import Bytes # Keys. DOCKER_SCHEMA2_MANIFEST_VERSION_KEY = 'schemaVersion' @@ -129,13 +129,15 @@ class DockerSchema2Manifest(ManifestInterface): } def __init__(self, manifest_bytes): + assert isinstance(manifest_bytes, Bytes) + self._payload = manifest_bytes self._filesystem_layers = None self._cached_built_config = None try: - self._parsed = json.loads(ensure_utf8(self._payload)) + self._parsed = json.loads(self._payload.as_unicode()) except ValueError as ve: raise MalformedSchema2Manifest('malformed manifest data: %s' % ve) @@ -166,7 +168,7 @@ class DockerSchema2Manifest(ManifestInterface): @property def digest(self): - return digest_tools.sha256_digest(ensure_utf8(self._payload)) + return digest_tools.sha256_digest(self._payload.as_encoded_str()) @property def config(self): @@ -365,7 +367,7 @@ class DockerSchema2Manifest(ManifestInterface): self.config.size) raise MalformedSchema2Manifest(msg) - self._cached_built_config = DockerSchema2Config(config_bytes) + self._cached_built_config = DockerSchema2Config(Bytes.for_string_or_unicode(config_bytes)) return self._cached_built_config def _generate_filesystem_layers(self): @@ -446,4 +448,6 @@ class DockerSchema2ManifestBuilder(object): _build_layer(layer) for layer in self.filesystem_layers ], } - return DockerSchema2Manifest(json.dumps(manifest_dict, ensure_ascii=ensure_ascii, indent=3)) + + json_str = json.dumps(manifest_dict, ensure_ascii=ensure_ascii, indent=3) + return DockerSchema2Manifest(Bytes.for_string_or_unicode(json_str)) diff --git a/image/docker/schema2/test/test_config.py b/image/docker/schema2/test/test_config.py index c0e86c05f..ba59feea3 100644 --- a/image/docker/schema2/test/test_config.py +++ b/image/docker/schema2/test/test_config.py @@ -2,6 +2,7 @@ import json import pytest from image.docker.schema2.config import MalformedSchema2Config, DockerSchema2Config +from util.bytes import Bytes @pytest.mark.parametrize('json_data', [ '', @@ -14,7 +15,7 @@ from image.docker.schema2.config import MalformedSchema2Config, DockerSchema2Con ]) def test_malformed_configs(json_data): with pytest.raises(MalformedSchema2Config): - DockerSchema2Config(json_data) + DockerSchema2Config(Bytes.for_string_or_unicode(json_data)) CONFIG_BYTES = json.dumps({ "architecture": "amd64", @@ -106,7 +107,7 @@ CONFIG_BYTES = json.dumps({ }) def test_valid_config(): - config = DockerSchema2Config(CONFIG_BYTES) + config = DockerSchema2Config(Bytes.for_string_or_unicode(CONFIG_BYTES)) history = list(config.history) assert len(history) == 4 diff --git a/image/docker/schema2/test/test_conversion.py b/image/docker/schema2/test/test_conversion.py index ef0bc359f..75a1bece9 100644 --- a/image/docker/schema2/test/test_conversion.py +++ b/image/docker/schema2/test/test_conversion.py @@ -6,12 +6,14 @@ import pytest from image.docker.schema1 import DockerSchema1Manifest, DOCKER_SCHEMA1_CONTENT_TYPES from image.docker.schema2.manifest import DockerSchema2Manifest from image.docker.schemautil import ContentRetrieverForTesting +from util.bytes import Bytes + def _get_test_file_contents(test_name, kind): filename = '%s.%s.json' % (test_name, kind) data_dir = os.path.dirname(__file__) with open(os.path.join(data_dir, 'conversion_data', filename), 'r') as f: - return f.read() + return Bytes.for_string_or_unicode(f.read()) @pytest.mark.parametrize('name, config_sha', [ @@ -21,7 +23,7 @@ def _get_test_file_contents(test_name, kind): ]) def test_legacy_layers(name, config_sha): cr = {} - cr[config_sha] = _get_test_file_contents(name, 'config') + cr[config_sha] = _get_test_file_contents(name, 'config').as_encoded_str() retriever = ContentRetrieverForTesting(cr) schema2 = DockerSchema2Manifest(_get_test_file_contents(name, 'schema2')) @@ -47,7 +49,7 @@ def test_legacy_layers(name, config_sha): ]) def test_conversion(name, config_sha): cr = {} - cr[config_sha] = _get_test_file_contents(name, 'config') + cr[config_sha] = _get_test_file_contents(name, 'config').as_encoded_str() retriever = ContentRetrieverForTesting(cr) schema2 = DockerSchema2Manifest(_get_test_file_contents(name, 'schema2')) @@ -77,7 +79,7 @@ def test_conversion(name, config_sha): ]) def test_2to1_conversion(name, config_sha): cr = {} - cr[config_sha] = _get_test_file_contents(name, 'config') + cr[config_sha] = _get_test_file_contents(name, 'config').as_encoded_str() retriever = ContentRetrieverForTesting(cr) schema2 = DockerSchema2Manifest(_get_test_file_contents(name, 'schema2')) diff --git a/image/docker/schema2/test/test_list.py b/image/docker/schema2/test/test_list.py index 2d2d1ef3f..f944c534b 100644 --- a/image/docker/schema2/test/test_list.py +++ b/image/docker/schema2/test/test_list.py @@ -9,6 +9,8 @@ from image.docker.schema2.list import (MalformedSchema2ManifestList, DockerSchem from image.docker.schema2.test.test_manifest import MANIFEST_BYTES as v22_bytes from image.docker.schemautil import ContentRetrieverForTesting from image.docker.test.test_schema1 import MANIFEST_BYTES as v21_bytes +from util.bytes import Bytes + @pytest.mark.parametrize('json_data', [ '', @@ -21,7 +23,7 @@ from image.docker.test.test_schema1 import MANIFEST_BYTES as v21_bytes ]) def test_malformed_manifest_lists(json_data): with pytest.raises(MalformedSchema2ManifestList): - DockerSchema2ManifestList(json_data) + DockerSchema2ManifestList(Bytes.for_string_or_unicode(json_data)) MANIFESTLIST_BYTES = json.dumps({ @@ -74,11 +76,11 @@ retriever = ContentRetrieverForTesting({ }) def test_valid_manifestlist(): - manifestlist = DockerSchema2ManifestList(MANIFESTLIST_BYTES) + manifestlist = DockerSchema2ManifestList(Bytes.for_string_or_unicode(MANIFESTLIST_BYTES)) assert len(manifestlist.manifests(retriever)) == 2 assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' - assert manifestlist.bytes == MANIFESTLIST_BYTES + assert manifestlist.bytes.as_encoded_str() == MANIFESTLIST_BYTES assert manifestlist.manifest_dict == json.loads(MANIFESTLIST_BYTES) assert manifestlist.get_layers(retriever) is None assert not manifestlist.blob_digests @@ -108,18 +110,18 @@ def test_valid_manifestlist(): def test_get_schema1_manifest_no_matching_list(): - manifestlist = DockerSchema2ManifestList(NO_AMD_MANIFESTLIST_BYTES) + manifestlist = DockerSchema2ManifestList(Bytes.for_string_or_unicode(NO_AMD_MANIFESTLIST_BYTES)) assert len(manifestlist.manifests(retriever)) == 1 assert manifestlist.media_type == 'application/vnd.docker.distribution.manifest.list.v2+json' - assert manifestlist.bytes == NO_AMD_MANIFESTLIST_BYTES + assert manifestlist.bytes.as_encoded_str() == NO_AMD_MANIFESTLIST_BYTES compatible_manifest = manifestlist.get_schema1_manifest('foo', 'bar', 'baz', retriever) assert compatible_manifest is None def test_builder(): - existing = DockerSchema2ManifestList(MANIFESTLIST_BYTES) + existing = DockerSchema2ManifestList(Bytes.for_string_or_unicode(MANIFESTLIST_BYTES)) builder = DockerSchema2ManifestListBuilder() for index, manifest in enumerate(existing.manifests(retriever)): builder.add_manifest(manifest.manifest_obj, "amd64", "os") diff --git a/image/docker/schema2/test/test_manifest.py b/image/docker/schema2/test/test_manifest.py index f8bfea5aa..6d2193b6d 100644 --- a/image/docker/schema2/test/test_manifest.py +++ b/image/docker/schema2/test/test_manifest.py @@ -13,6 +13,7 @@ from image.docker.schema2.manifest import (MalformedSchema2Manifest, DockerSchem from image.docker.schema2.config import DockerSchema2Config from image.docker.schema2.test.test_config import CONFIG_BYTES from image.docker.schemautil import ContentRetrieverForTesting +from util.bytes import Bytes @pytest.mark.parametrize('json_data', [ @@ -26,7 +27,7 @@ from image.docker.schemautil import ContentRetrieverForTesting ]) def test_malformed_manifests(json_data): with pytest.raises(MalformedSchema2Manifest): - DockerSchema2Manifest(json_data) + DockerSchema2Manifest(Bytes.for_string_or_unicode(json_data)) MANIFEST_BYTES = json.dumps({ @@ -95,7 +96,7 @@ REMOTE_MANIFEST_BYTES = json.dumps({ }) def test_valid_manifest(): - manifest = DockerSchema2Manifest(MANIFEST_BYTES) + manifest = DockerSchema2Manifest(Bytes.for_string_or_unicode(MANIFEST_BYTES)) assert manifest.config.size == 1885 assert str(manifest.config.digest) == 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7' assert manifest.media_type == "application/vnd.docker.distribution.manifest.v2+json" @@ -148,7 +149,7 @@ def test_valid_manifest(): def test_valid_remote_manifest(): - manifest = DockerSchema2Manifest(REMOTE_MANIFEST_BYTES) + manifest = DockerSchema2Manifest(Bytes.for_string_or_unicode(REMOTE_MANIFEST_BYTES)) assert manifest.config.size == 1885 assert str(manifest.config.digest) == 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7' assert manifest.media_type == "application/vnd.docker.distribution.manifest.v2+json" @@ -209,7 +210,7 @@ def test_valid_remote_manifest(): def test_schema2_builder(): - manifest = DockerSchema2Manifest(MANIFEST_BYTES) + manifest = DockerSchema2Manifest(Bytes.for_string_or_unicode(MANIFEST_BYTES)) builder = DockerSchema2ManifestBuilder() builder.set_config_digest(manifest.config.digest, manifest.config.size) @@ -232,12 +233,12 @@ def test_get_manifest_labels(): "history": [], }, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885) - manifest = DockerSchema2Manifest(MANIFEST_BYTES) + manifest = DockerSchema2Manifest(Bytes.for_string_or_unicode(MANIFEST_BYTES)) assert manifest.get_manifest_labels(retriever) == labels def test_build_schema1(): - manifest = DockerSchema2Manifest(MANIFEST_BYTES) + manifest = DockerSchema2Manifest(Bytes.for_string_or_unicode(MANIFEST_BYTES)) assert not manifest.has_remote_layer retriever = ContentRetrieverForTesting({ @@ -277,7 +278,7 @@ def test_get_schema1_manifest(): ], }, 'sha256:b5b2b2c507a0944348e0303114d8d93aaaa081732b86451d9bce1f432a537bc7', 1885) - manifest = DockerSchema2Manifest(MANIFEST_BYTES) + manifest = DockerSchema2Manifest(Bytes.for_string_or_unicode(MANIFEST_BYTES)) schema1 = manifest.get_schema1_manifest('somenamespace', 'somename', 'sometag', retriever) assert schema1 is not None assert schema1.media_type == DOCKER_SCHEMA1_MANIFEST_CONTENT_TYPE @@ -396,7 +397,7 @@ def test_build_unencoded_unicode_manifest(): ], }, ensure_ascii=False) - schema2_config = DockerSchema2Config(config_json) + schema2_config = DockerSchema2Config(Bytes.for_string_or_unicode(config_json)) builder = DockerSchema2ManifestBuilder() builder.set_config(schema2_config) @@ -414,7 +415,7 @@ def test_load_unicode_manifest(): with open(os.path.join(test_dir, 'unicode_manifest.json'), 'r') as f: manifest_bytes = f.read() - manifest = DockerSchema2Manifest(manifest_bytes) + manifest = DockerSchema2Manifest(Bytes.for_string_or_unicode(manifest_bytes)) assert manifest.digest == 'sha256:97556fa8c553395bd9d8e19a04acef4716ca287ffbf6bde14dd9966053912613' layers = list(manifest.get_layers(retriever)) diff --git a/image/docker/schemas.py b/image/docker/schemas.py index f3d48d676..ab0d952f9 100644 --- a/image/docker/schemas.py +++ b/image/docker/schemas.py @@ -4,18 +4,14 @@ from image.docker.schema2 import (DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE, DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE) from image.docker.schema2.manifest import DockerSchema2Manifest from image.docker.schema2.list import DockerSchema2ManifestList +from util.bytes import Bytes def parse_manifest_from_bytes(manifest_bytes, media_type, validate=True): """ Parses and returns a manifest from the given bytes, for the given media type. Raises a ManifestException if the parse fails for some reason. """ - # NOTE: Docker sometimes pushed manifests encoded as utf-8, so decode them - # if we can. Otherwise, treat the string as already unicode encoded. - try: - manifest_bytes = manifest_bytes.decode('utf-8') - except: - pass + assert isinstance(manifest_bytes, Bytes) if media_type == DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE: return DockerSchema2Manifest(manifest_bytes) diff --git a/image/docker/schemautil.py b/image/docker/schemautil.py index 1840a6dba..adfa021c7 100644 --- a/image/docker/schemautil.py +++ b/image/docker/schemautil.py @@ -24,14 +24,6 @@ class ContentRetrieverForTesting(ContentRetriever): return ContentRetrieverForTesting(digests) -def ensure_utf8(unicode_or_str): - """ Ensures the given string is a utf-8 encoded str and not a unicode type. """ - if isinstance(unicode_or_str, unicode): - return unicode_or_str.encode('utf-8') - - return unicode_or_str - - class _CustomEncoder(json.JSONEncoder): def encode(self, o): encoded = super(_CustomEncoder, self).encode(o) diff --git a/image/docker/test/test_schema1.py b/image/docker/test/test_schema1.py index f2f9cfcf5..2c290e467 100644 --- a/image/docker/test/test_schema1.py +++ b/image/docker/test/test_schema1.py @@ -8,6 +8,8 @@ import pytest from app import docker_v2_signing_key from image.docker.schema1 import (MalformedSchema1Manifest, DockerSchema1Manifest, DockerSchema1ManifestBuilder) +from util.bytes import Bytes + @pytest.mark.parametrize('json_data', [ '', @@ -20,7 +22,7 @@ from image.docker.schema1 import (MalformedSchema1Manifest, DockerSchema1Manifes ]) def test_malformed_manifests(json_data): with pytest.raises(MalformedSchema1Manifest): - DockerSchema1Manifest(json_data) + DockerSchema1Manifest(Bytes.for_string_or_unicode(json_data)) MANIFEST_BYTES = json.dumps({ @@ -64,7 +66,7 @@ MANIFEST_BYTES = json.dumps({ def test_valid_manifest(): - manifest = DockerSchema1Manifest(MANIFEST_BYTES, validate=False) + manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(MANIFEST_BYTES), validate=False) assert len(manifest.signatures) == 1 assert manifest.namespace == '' assert manifest.repo_name == 'hello-world' @@ -107,7 +109,7 @@ def test_validate_manifest(): with open(os.path.join(test_dir, 'validated_manifest.json'), 'r') as f: manifest_bytes = f.read() - manifest = DockerSchema1Manifest(manifest_bytes, validate=True) + manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=True) digest = manifest.digest assert digest == 'sha256:b5dc4f63fdbd64f34f2314c0747ef81008f9fcddce4edfc3fd0e8ec8b358d571' assert manifest.created_datetime @@ -118,7 +120,7 @@ def test_validate_manifest_with_unicode(): with open(os.path.join(test_dir, 'validated_manifest_with_unicode.json'), 'r') as f: manifest_bytes = f.read() - manifest = DockerSchema1Manifest(manifest_bytes, validate=True) + manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=True) digest = manifest.digest assert digest == 'sha256:815ecf45716a96b19d54d911e6ace91f78bab26ca0dd299645d9995dacd9f1ef' assert manifest.created_datetime @@ -140,7 +142,7 @@ def test_validate_manifest_with_unencoded_unicode(): with open(os.path.join(test_dir, 'manifest_unencoded_unicode.json'), 'r') as f: manifest_bytes = f.read() - manifest = DockerSchema1Manifest(manifest_bytes) + manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes)) digest = manifest.digest assert digest == 'sha256:5d8a0f34744a39bf566ba430251adc0cc86587f86aed3ac2acfb897f349777bc' assert manifest.created_datetime @@ -162,3 +164,17 @@ def test_build_unencoded_unicode_manifest(with_key): built = builder.build(with_key, ensure_ascii=False) built._validate() + + +def test_validate_manifest_known_issue(): + test_dir = os.path.dirname(os.path.abspath(__file__)) + with open(os.path.join(test_dir, 'validate_manifest_known_issue.json'), 'r') as f: + manifest_bytes = f.read() + + manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes)) + digest = manifest.digest + assert digest == 'sha256:44518f5a4d1cb5b7a6347763116fb6e10f6a8563b6c40bb389a0a982f0a9f47a' + assert manifest.created_datetime + + layers = list(manifest.get_layers(None)) + assert layers[-1].author is None diff --git a/image/docker/test/test_schemas.py b/image/docker/test/test_schemas.py index 143323fd1..def881984 100644 --- a/image/docker/test/test_schemas.py +++ b/image/docker/test/test_schemas.py @@ -7,6 +7,7 @@ from image.docker.schema2 import DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE from image.docker.test.test_schema1 import MANIFEST_BYTES as SCHEMA1_BYTES from image.docker.schema2.test.test_list import MANIFESTLIST_BYTES from image.docker.schema2.test.test_manifest import MANIFEST_BYTES as SCHEMA2_BYTES +from util.bytes import Bytes @pytest.mark.parametrize('media_type, manifest_bytes', [ @@ -15,4 +16,5 @@ from image.docker.schema2.test.test_manifest import MANIFEST_BYTES as SCHEMA2_BY (DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE, MANIFESTLIST_BYTES), ]) def test_parse_manifest_from_bytes(media_type, manifest_bytes): - assert parse_manifest_from_bytes(manifest_bytes, media_type, validate=False) + assert parse_manifest_from_bytes(Bytes.for_string_or_unicode(manifest_bytes), media_type, + validate=False) diff --git a/image/docker/test/validate_manifest_known_issue.json b/image/docker/test/validate_manifest_known_issue.json new file mode 100644 index 000000000..a54e99b61 --- /dev/null +++ b/image/docker/test/validate_manifest_known_issue.json @@ -0,0 +1,56 @@ +{ + "schemaVersion": 1, + "name": "quaymonitor/monitortest2", + "tag": "latest", + "architecture": "x86_64", + "fsLayers": [ + { + "blobSum": "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4" + }, + { + "blobSum": "sha256:184dc3db39b5e19dc39547f43db46ea48cd6cc779e806a3c8a5e5396acd20206" + }, + { + "blobSum": "sha256:db80bcab0e8b69656505332fcdff3ef2b9f664a2029d1b2f97224cffcf689afc" + }, + { + "blobSum": "sha256:184dc3db39b5e19dc39547f43db46ea48cd6cc779e806a3c8a5e5396acd20206" + }, + { + "blobSum": "sha256:f0a98344d604e54694fc6118cf7a0cbd10dc7b2e9be8607ba8c5bfd7ba3c1067" + } + ], + "history": [ + { + "v1Compatibility": "{\"architecture\":\"x86_64\",\"config\":{\"Hostname\":\"4c9181ab6b87\",\"Domainname\":\"\",\"User\":\"\",\"AttachStdin\":false,\"AttachStdout\":false,\"AttachStderr\":false,\"Tty\":false,\"OpenStdin\":false,\"StdinOnce\":false,\"Env\":[\"HOME=/\",\"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"],\"Cmd\":[\"sh\",\"echo\",\"\\\"2019-01-08 19:13:20 +0000\\\" \\u003e foo\"],\"Image\":\"quay.io/quay/busybox\",\"Volumes\":null,\"WorkingDir\":\"\",\"Entrypoint\":null,\"OnBuild\":null,\"Labels\":{}},\"container\":\"4c9181ab6b87fe75b5c0955c6c78983dec337914b05e65fb0073cce0ad076106\",\"container_config\":{\"Hostname\":\"4c9181ab6b87\",\"Domainname\":\"\",\"User\":\"\",\"AttachStdin\":false,\"AttachStdout\":false,\"AttachStderr\":false,\"Tty\":false,\"OpenStdin\":false,\"StdinOnce\":false,\"Env\":[\"HOME=/\",\"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"],\"Cmd\":[\"sh\",\"echo\",\"\\\"2019-01-08 19:13:20 +0000\\\" \\u003e foo\"],\"Image\":\"quay.io/quay/busybox\",\"Volumes\":null,\"WorkingDir\":\"\",\"Entrypoint\":null,\"OnBuild\":null,\"Labels\":{}},\"created\":\"2019-01-08T19:13:20.674196032Z\",\"docker_version\":\"18.06.1-ce\",\"id\":\"7da7c4e4bcb121915fb33eb5c76ffef194cdcc14608010692cfce5734bd84751\",\"os\":\"linux\",\"parent\":\"ec75e623647b299585bdb0991293bd446e5545e9a4dabf9d37922d5671d9d860\",\"throwaway\":true}" + }, + { + "v1Compatibility": "{\"id\":\"ec75e623647b299585bdb0991293bd446e5545e9a4dabf9d37922d5671d9d860\",\"parent\":\"f32bc6daa02c76f0b1773688684bf3bee719a69db06192432e6c28a238f4cf4a\",\"created\":\"2014-02-03T15:58:08.872585903Z\",\"container_config\":{\"Cmd\":[\"/bin/sh -c #(nop) CMD [/bin/sh -c /bin/sh]\"]},\"author\":\"Jérôme Petazzoni \\u003cjerome@docker.com\\u003e\"}" + }, + { + "v1Compatibility": "{\"id\":\"f32bc6daa02c76f0b1773688684bf3bee719a69db06192432e6c28a238f4cf4a\",\"parent\":\"02feaf4fdc57dba2b142dae9d8dd0c90e710be710bea25ce63269e65d8f32872\",\"created\":\"2014-02-03T15:58:08.72383042Z\",\"container_config\":{\"Cmd\":[\"/bin/sh -c #(nop) ADD rootfs.tar in /\"]},\"author\":\"Jérôme Petazzoni \\u003cjerome@docker.com\\u003e\"}" + }, + { + "v1Compatibility": "{\"id\":\"02feaf4fdc57dba2b142dae9d8dd0c90e710be710bea25ce63269e65d8f32872\",\"parent\":\"f9a6e54178f312aa3686d7305b970e7d908d58b32e3f4554731b647e07b48fd2\",\"created\":\"2014-02-03T15:58:08.52236968Z\",\"container_config\":{\"Cmd\":[\"/bin/sh -c #(nop) MAINTAINER Jérôme Petazzoni \\u003cjerome@docker.com\\u003e\"]},\"author\":\"Jérôme Petazzoni \\u003cjerome@docker.com\\u003e\"}" + }, + { + "v1Compatibility": "{\"id\":\"f9a6e54178f312aa3686d7305b970e7d908d58b32e3f4554731b647e07b48fd2\",\"comment\":\"Imported from -\",\"created\":\"2013-06-13T14:03:50.821769-07:00\",\"container_config\":{\"Cmd\":[\"\"]}}" + } + ], + "signatures": [ + { + "header": { + "jwk": { + "crv": "P-256", + "kid": "XPAM:RVQE:4LWW:ABXI:QLLK:O2LK:XJ4V:UAOJ:WM24:ZG6J:UIJ3:JAYM", + "kty": "EC", + "x": "ijnW3d93SINE1y3GjNsCMYghAb7NT21vSiYK8pWdBkM", + "y": "7t-mGjoYOhEIGVaCSEclLLkMgHz2S9WXkReZJEBx-_U" + }, + "alg": "ES256" + }, + "signature": "N9m-NNL8CdGwxEHHHaJDhbT5_FFKBSdyy-7lP4jnWG3AQmOWbPEXTFANTeH2CNPvAbaM9ZqQm0dQFQVnOe5GNQ", + "protected": "eyJmb3JtYXRMZW5ndGgiOjM1OTgsImZvcm1hdFRhaWwiOiJDbjAiLCJ0aW1lIjoiMjAxOS0wMS0wOFQxOToxMzoyM1oifQ" + } + ] +} \ No newline at end of file diff --git a/test/registry/protocol_v2.py b/test/registry/protocol_v2.py index c0176f5e4..2b69c663a 100644 --- a/test/registry/protocol_v2.py +++ b/test/registry/protocol_v2.py @@ -12,6 +12,7 @@ from image.docker.schema2.list import DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE from image.docker.schemas import parse_manifest_from_bytes from test.registry.protocols import (RegistryProtocol, Failures, ProtocolOptions, PushResult, PullResult) +from util.bytes import Bytes @unique @@ -168,7 +169,8 @@ class V2Protocol(RegistryProtocol): # Parse the returned manifest list and ensure it matches. assert response.headers['Content-Type'] == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE - retrieved = parse_manifest_from_bytes(response.text, response.headers['Content-Type']) + retrieved = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), + response.headers['Content-Type']) assert retrieved.schema_version == 2 assert retrieved.is_manifest_list assert retrieved.digest == manifestlist.digest @@ -184,7 +186,8 @@ class V2Protocol(RegistryProtocol): if expected_failure is not None: return None - manifest = parse_manifest_from_bytes(response.text, response.headers['Content-Type']) + manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), + response.headers['Content-Type']) assert not manifest.is_manifest_list assert manifest.digest == manifest_digest @@ -221,7 +224,7 @@ class V2Protocol(RegistryProtocol): self.conduct(session, 'PUT', '/v2/%s/manifests/%s' % (self.repo_name(namespace, repo_name), manifest.digest), - data=manifest.bytes, + data=manifest.bytes.as_encoded_str(), expected_status=(202, expected_failure, V2ProtocolSteps.PUT_MANIFEST), headers=manifest_headers) @@ -235,7 +238,7 @@ class V2Protocol(RegistryProtocol): self.conduct(session, 'PUT', '/v2/%s/manifests/%s' % (self.repo_name(namespace, repo_name), tag_name), - data=manifestlist.bytes, + data=manifestlist.bytes.as_encoded_str(), expected_status=(202, expected_failure, V2ProtocolSteps.PUT_MANIFEST_LIST), headers=manifest_headers) @@ -282,10 +285,10 @@ class V2Protocol(RegistryProtocol): config['config'] = images[-1].config config_json = json.dumps(config, ensure_ascii=options.ensure_ascii) - schema2_config = DockerSchema2Config(config_json) + schema2_config = DockerSchema2Config(Bytes.for_string_or_unicode(config_json)) builder.set_config(schema2_config) - blobs[schema2_config.digest] = schema2_config.bytes.encode('utf-8') + blobs[schema2_config.digest] = schema2_config.bytes.as_encoded_str() return builder.build(ensure_ascii=options.ensure_ascii) def build_schema1(self, namespace, repo_name, tag_name, images, blobs, options): @@ -372,7 +375,7 @@ class V2Protocol(RegistryProtocol): tag_or_digest = tag_name if not options.push_by_manifest_digest else manifest.digest self.conduct(session, 'PUT', '/v2/%s/manifests/%s' % (self.repo_name(namespace, repo_name), tag_or_digest), - data=manifest.bytes.encode('utf-8'), + data=manifest.bytes.as_encoded_str(), expected_status=(put_code, expected_failure, V2ProtocolSteps.PUT_MANIFEST), headers=manifest_headers) @@ -546,7 +549,8 @@ class V2Protocol(RegistryProtocol): if not self.schema2: assert response.headers['Content-Type'] in DOCKER_SCHEMA1_CONTENT_TYPES - manifest = parse_manifest_from_bytes(response.text, response.headers['Content-Type']) + manifest = parse_manifest_from_bytes(Bytes.for_string_or_unicode(response.text), + response.headers['Content-Type']) manifests[tag_name] = manifest if manifest.schema_version == 1: diff --git a/test/registry_tests.py b/test/registry_tests.py index b14b4b8e4..05de88e90 100644 --- a/test/registry_tests.py +++ b/test/registry_tests.py @@ -682,7 +682,7 @@ class V2RegistryPushMixin(V2RegistryMixin): # a 202 response for success. put_code = 400 if invalid else 202 self.conduct('PUT', '/v2/%s/manifests/%s' % (repo_name, tag_name), - data=manifest.bytes, expected_code=put_code, + data=manifest.bytes.as_encoded_str(), expected_code=put_code, headers={'Content-Type': 'application/json'}, auth='jwt') return checksums, manifests @@ -1628,7 +1628,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix manifest = builder.build(_JWK) self.conduct('PUT', '/v2/%s/manifests/%s' % (repo_name, tag_name), - data=manifest.bytes, expected_code=415, + data=manifest.bytes.as_encoded_str(), expected_code=415, headers={'Content-Type': 'application/vnd.docker.distribution.manifest.v2+json'}, auth='jwt') @@ -1662,7 +1662,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix manifest = builder.build(_JWK) self.conduct('PUT', '/v2/%s/manifests/%s' % (repo_name, tag_name), - data=manifest.bytes, expected_code=415, + data=manifest.bytes.as_encoded_str(), expected_code=415, headers={'Content-Type': 'application/vnd.oci.image.manifest.v1+json'}, auth='jwt') @@ -1682,7 +1682,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix manifest = builder.build(_JWK) response = self.conduct('PUT', '/v2/%s/manifests/%s' % (repo_name, tag_name), - data=manifest.bytes, expected_code=400, + data=manifest.bytes.as_encoded_str(), expected_code=400, headers={'Content-Type': 'application/json'}, auth='jwt') self.assertEquals('MANIFEST_INVALID', response.json()['errors'][0]['code']) diff --git a/util/bytes.py b/util/bytes.py new file mode 100644 index 000000000..edf5531d7 --- /dev/null +++ b/util/bytes.py @@ -0,0 +1,32 @@ +class Bytes(object): + """ Wrapper around strings and unicode objects to ensure we are always using + the correct encoded or decoded data. + """ + def __init__(self, data): + assert isinstance(data, str) + self._encoded_data = data + + @classmethod + def for_string_or_unicode(cls, input): + # If the string is a unicode string, then encode its data as UTF-8. Note that + # we don't catch any decode exceptions here, as we want those to be raised. + if isinstance(input, unicode): + return Bytes(input.encode('utf-8')) + + # Next, try decoding as UTF-8. If we have a utf-8 encoded string, then we have no + # additional conversion to do. + try: + input.decode('utf-8') + return Bytes(input) + except UnicodeDecodeError: + pass + + # Finally, if the data is (somehow) a unicode string inside a `str` type, then + # re-encoded the data. + return Bytes(input.encode('utf-8')) + + def as_encoded_str(self): + return self._encoded_data + + def as_unicode(self): + return self._encoded_data.decode('utf-8') diff --git a/workers/tagbackfillworker.py b/workers/tagbackfillworker.py index ed3bceb23..e949b1dc1 100644 --- a/workers/tagbackfillworker.py +++ b/workers/tagbackfillworker.py @@ -19,6 +19,7 @@ from image.docker.schema1 import (DockerSchema1Manifest, ManifestException, Mani DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE) from workers.worker import Worker +from util.bytes import Bytes from util.log import logfile_path from util.migrate.allocator import yield_random_entries @@ -33,7 +34,7 @@ class BrokenManifest(ManifestInterface): """ def __init__(self, digest, payload): self._digest = digest - self._payload = payload + self._payload = Bytes.for_string_or_unicode(payload) @property def digest(self): diff --git a/workers/test/test_tagbackfillworker.py b/workers/test/test_tagbackfillworker.py index 0dd2d2e70..1a14a0f30 100644 --- a/workers/test/test_tagbackfillworker.py +++ b/workers/test/test_tagbackfillworker.py @@ -142,7 +142,8 @@ def test_manifestbackfillworker_mislinked_manifest(clear_rows, initialized_db): builder.add_layer(tag_v30.image.storage.content_checksum, '{"id": "foo"}') manifest = builder.build(docker_v2_signing_key) - mislinked_manifest = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest, + mislinked_manifest = TagManifest.create(json_data=manifest.bytes.as_encoded_str(), + digest=manifest.digest, tag=tag_v50) # Backfill the manifest and ensure its proper content checksum was linked. @@ -176,7 +177,8 @@ def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initializ builder.add_layer('sha256:deadbeef', '{"id": "foo"}') manifest = builder.build(docker_v2_signing_key) - broken_manifest = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest, + broken_manifest = TagManifest.create(json_data=manifest.bytes.as_encoded_str(), + digest=manifest.digest, tag=tag_v50) # Backfill the manifest and ensure it is marked as broken. @@ -208,9 +210,9 @@ def test_manifestbackfillworker_repeat_digest(clear_rows, initialized_db): builder.add_layer('sha256:deadbeef', '{"id": "foo"}') manifest = builder.build(docker_v2_signing_key) - manifest_1 = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest, + manifest_1 = TagManifest.create(json_data=manifest.bytes.as_encoded_str(), digest=manifest.digest, tag=tag_v30) - manifest_2 = TagManifest.create(json_data=manifest.bytes, digest=manifest.digest, + manifest_2 = TagManifest.create(json_data=manifest.bytes.as_encoded_str(), digest=manifest.digest, tag=tag_v50) # Backfill "both" manifests and ensure both are pointed to by a single resulting row.