Properly handle the empty layer when pushing schema 2 manifests

Docker doesn't send us the contents of this layer, so we are forced to synthesize it ourselves
This commit is contained in:
Joseph Schorr 2018-11-25 16:16:59 +02:00
parent 947c029afa
commit 4985040d31
13 changed files with 173 additions and 25 deletions

View file

@ -1,3 +1,5 @@
import logging
from datetime import datetime
from uuid import uuid4
@ -7,6 +9,9 @@ from data.database import (Repository, Namespace, ImageStorage, Image, ImageStor
BlobUpload, ImageStorageLocation, db_random_func)
logger = logging.getLogger(__name__)
def get_repository_blob_by_digest(repository, blob_digest):
""" Find the content-addressable blob linked to the specified repository.
"""
@ -157,3 +162,31 @@ def initiate_upload(namespace, repo_name, uuid, location_name, storage_metadata)
location = storage_model.get_image_location_for_name(location_name)
return BlobUpload.create(repository=repo, location=location.id, uuid=uuid,
storage_metadata=storage_metadata)
def get_or_create_shared_blob(digest, byte_data, storage):
""" Returns the ImageStorage blob with the given digest or, if not present,
adds a row and writes the given byte data to the storage engine.
This method is *only* to be used for shared blobs that are globally
accessible, such as the special empty gzipped tar layer that Docker
no longer pushes to us.
"""
try:
return ImageStorage.get(content_checksum=digest, uploading=False)
except ImageStorage.DoesNotExist:
record = ImageStorage.create(image_size=len(byte_data), content_checksum=digest,
cas_path=True, uploading=True)
preferred = storage.preferred_locations[0]
location_obj = ImageStorageLocation.get(name=preferred)
try:
storage.put_content([preferred], storage_model.get_layer_path(record), byte_data)
ImageStoragePlacement.create(storage=record, location=location_obj)
record.uploading = False
record.save()
except:
logger.exception('Exception when trying to write special layer %s', digest)
record.delete_instance()
raise
return record

View file

@ -7,11 +7,13 @@ from peewee import IntegrityError, JOIN
from data.database import (Tag, Manifest, ManifestBlob, ManifestLegacyImage, ManifestChild,
db_transaction)
from data.model import BlobDoesNotExist
from data.model.blob import get_or_create_shared_blob
from data.model.oci.tag import filter_to_alive_tags
from data.model.oci.label import create_manifest_label
from data.model.oci.retriever import RepositoryContentRetriever
from data.model.storage import lookup_repo_storages_by_content_checksum
from data.model.image import lookup_repository_images, get_image, synthesize_v1_image
from image.docker.schema2 import EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES
from image.docker.schema1 import ManifestException
from image.docker.schema2.list import MalformedSchema2ManifestList
from util.validation import is_json
@ -121,6 +123,15 @@ def _create_manifest(repository_id, manifest_interface_instance, storage):
manifest_interface_instance.digest, repository_id)
return None
# Special check: If the empty layer blob is needed for this manifest, add it to the
# blob map. This is necessary because Docker decided to elide sending of this special
# empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
if manifest_interface_instance.get_requires_empty_layer_blob(retriever):
shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST, EMPTY_LAYER_BYTES, storage)
assert not shared_blob.uploading
assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob
# Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
# image.
legacy_image = None
@ -214,10 +225,11 @@ def _populate_legacy_image(repository_id, manifest_interface_instance, blob_map,
if parent_image is None:
return None
storage_reference = blob_map[rewritten_image.content_checksum]
synthesized = synthesize_v1_image(
repository_id,
blob_map[rewritten_image.content_checksum].id,
blob_map[rewritten_image.content_checksum].image_size,
storage_reference.id,
storage_reference.image_size,
rewritten_image.image_id,
rewritten_image.created,
rewritten_image.comment,