Further fixes for unicode handling in manifests

We were occasionally trying to compute schema 2 version 1 signatures on the *unicode* representation, which was failing the signature check. This PR adds a new wrapper type called `Bytes`, which all manifests must take in, and which handles the unicodes vs encoded utf-8 stuff in a central location. This PR also adds a test for the manifest that was breaking in production.
This commit is contained in:
Joseph Schorr 2019-01-08 20:49:00 -05:00
parent 05fa2bcbe0
commit 171c7e5238
28 changed files with 275 additions and 106 deletions

View file

@ -11,6 +11,7 @@ from image.docker import ManifestException
from image.docker.schemas import parse_manifest_from_bytes
from image.docker.schema1 import DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE
from image.docker.schema2 import DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE
from util.bytes import Bytes
class RepositoryReference(datatype('Repository', [])):
@ -176,7 +177,7 @@ class Tag(datatype('Tag', ['name', 'reversion', 'manifest_digest', 'lifetime_sta
return self._db_id
class Manifest(datatype('Manifest', ['digest', 'media_type', 'manifest_bytes'])):
class Manifest(datatype('Manifest', ['digest', 'media_type', 'internal_manifest_bytes'])):
""" Manifest represents a manifest in a repository. """
@classmethod
def for_tag_manifest(cls, tag_manifest, legacy_image=None):
@ -184,7 +185,7 @@ class Manifest(datatype('Manifest', ['digest', 'media_type', 'manifest_bytes']))
return None
return Manifest(db_id=tag_manifest.id, digest=tag_manifest.digest,
manifest_bytes=tag_manifest.json_data,
internal_manifest_bytes=Bytes.for_string_or_unicode(tag_manifest.json_data),
media_type=DOCKER_SCHEMA1_SIGNED_MANIFEST_CONTENT_TYPE, # Always in legacy.
inputs=dict(legacy_image=legacy_image, tag_manifest=True))
@ -195,7 +196,7 @@ class Manifest(datatype('Manifest', ['digest', 'media_type', 'manifest_bytes']))
return Manifest(db_id=manifest.id,
digest=manifest.digest,
manifest_bytes=manifest.manifest_bytes,
internal_manifest_bytes=Bytes.for_string_or_unicode(manifest.manifest_bytes),
media_type=manifest.media_type.name,
inputs=dict(legacy_image=legacy_image, tag_manifest=False))
@ -221,8 +222,8 @@ class Manifest(datatype('Manifest', ['digest', 'media_type', 'manifest_bytes']))
def get_parsed_manifest(self, validate=True):
""" Returns the parsed manifest for this manifest. """
validate = False # Temporarily disable.
return parse_manifest_from_bytes(self.manifest_bytes, self.media_type, validate=validate)
return parse_manifest_from_bytes(self.internal_manifest_bytes, self.media_type,
validate=validate)
@property
def layers_compressed_size(self):

View file

@ -15,16 +15,20 @@ from data import model
from data.database import (TagManifestLabelMap, TagManifestToManifest, Manifest, ManifestBlob,
ManifestLegacyImage, ManifestLabel, TagManifest, RepositoryTag, Image,
TagManifestLabel, TagManifest, TagManifestLabel, DerivedStorageForImage,
TorrentInfo, Tag, TagToRepositoryTag, close_db_filter)
TorrentInfo, Tag, TagToRepositoryTag, close_db_filter,
ImageStorageLocation)
from data.cache.impl import InMemoryDataModelCache
from data.registry_model.registry_pre_oci_model import PreOCIModel
from data.registry_model.registry_oci_model import OCIModel
from data.registry_model.datatypes import RepositoryReference
from data.registry_model.blobuploader import upload_blob, BlobUploadSettings
from data.registry_model.modelsplitter import SplitModel
from data.model.blob import store_blob_record_and_temp_link
from image.docker.types import ManifestImageLayer
from image.docker.schema1 import DockerSchema1ManifestBuilder, DOCKER_SCHEMA1_CONTENT_TYPES
from image.docker.schema1 import (DockerSchema1ManifestBuilder, DOCKER_SCHEMA1_CONTENT_TYPES,
DockerSchema1Manifest)
from image.docker.schema2.manifest import DockerSchema2ManifestBuilder
from util.bytes import Bytes
from test.fixtures import *
@ -823,3 +827,40 @@ def test_create_manifest_and_retarget_tag_with_labels(registry_model):
# Ensure the labels were applied.
assert tag.lifetime_end_ms is not None
def _populate_blob(digest):
location = ImageStorageLocation.get(name='local_us')
store_blob_record_and_temp_link('devtable', 'simple', digest, location, 1, 120)
def test_known_issue_schema1(registry_model):
test_dir = os.path.dirname(os.path.abspath(__file__))
path = os.path.join(test_dir, '../../../image/docker/test/validate_manifest_known_issue.json')
with open(path, 'r') as f:
manifest_bytes = f.read()
manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes))
for blob_digest in manifest.local_blob_digests:
_populate_blob(blob_digest)
digest = manifest.digest
assert digest == 'sha256:44518f5a4d1cb5b7a6347763116fb6e10f6a8563b6c40bb389a0a982f0a9f47a'
# Create the manifest in the database.
repository_ref = registry_model.lookup_repository('devtable', 'simple')
created_manifest, _ = registry_model.create_manifest_and_retarget_tag(repository_ref, manifest,
'latest', storage)
assert created_manifest
assert created_manifest.digest == manifest.digest
assert (created_manifest.internal_manifest_bytes.as_encoded_str() ==
manifest.bytes.as_encoded_str())
# Look it up again and validate.
found = registry_model.lookup_manifest_by_digest(repository_ref, manifest.digest, allow_dead=True)
assert found
assert found.digest == digest
assert found.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()
assert found.get_parsed_manifest().digest == digest