Have the layer PUT method calculate the uncompressed size in realtime, as trusting the JSON is fraught with complications

This commit is contained in:
Joseph Schorr 2014-09-29 17:00:47 -04:00
parent 7fd3c7d31b
commit 474add0fb1
3 changed files with 48 additions and 5 deletions

View file

@ -1251,7 +1251,7 @@ def set_image_size(docker_image_id, namespace_name, repository_name,
def set_image_metadata(docker_image_id, namespace_name, repository_name, created_date_str, comment,
command, uncompressed_size, parent=None):
command, parent=None):
with config.app_config['DB_TRANSACTION_FACTORY'](db):
query = (Image
.select(Image, ImageStorage)
@ -1272,7 +1272,6 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created
fetched.storage.created = dateutil.parser.parse(created_date_str).replace(tzinfo=None)
fetched.storage.comment = comment
fetched.storage.command = command
fetched.storage.uncompressed_size = uncompressed_size
if parent:
fetched.ancestors = '%s%s/' % (parent.ancestors, parent.id)

View file

@ -14,6 +14,7 @@ from util.http import abort, exact_abort
from auth.permissions import (ReadRepositoryPermission,
ModifyRepositoryPermission)
from data import model
from util import gzipstream
registry = Blueprint('registry', __name__)
@ -193,14 +194,26 @@ def put_image_layer(namespace, repository, image_id):
# encoding (Gunicorn)
input_stream = request.environ['wsgi.input']
# compute checksums
csums = []
# Create a socket reader to read the input stream containing the layer data.
sr = SocketReader(input_stream)
# Add a handler that store the data in storage.
tmp, store_hndlr = store.temp_store_handler()
sr.add_handler(store_hndlr)
# Add a handler to compute the uncompressed size of the layer.
uncompressed_size_info, size_hndlr = gzipstream.calculate_size_handler()
sr.add_handler(size_hndlr)
# Add a handler which computes the checksum.
h, sum_hndlr = checksums.simple_checksum_handler(json_data)
sr.add_handler(sum_hndlr)
# Stream write the data to storage.
store.stream_write(repo_image.storage.locations, layer_path, sr)
# Append the computed checksum.
csums = []
csums.append('sha256:{0}'.format(h.hexdigest()))
try:
@ -216,6 +229,12 @@ def put_image_layer(namespace, repository, image_id):
logger.debug('put_image_layer: Error when computing tarsum '
'{0}'.format(e))
# Write the uncompressed image size, if any.
if uncompressed_size_info['size'] > 0:
profile.debug('Storing uncompressed layer size: %s' % uncompressed_size_info['size'])
repo_image.storage.uncompressed_size = uncompressed_size_info['size']
repo_image.storage.save()
if repo_image.storage.checksum is None:
# We don't have a checksum stored yet, that's fine skipping the check.
# Not removing the mark though, image is not downloadable yet.
@ -460,7 +479,7 @@ def put_image_json(namespace, repository, image_id):
profile.debug('Setting image metadata')
model.set_image_metadata(image_id, namespace, repository,
data.get('created'), data.get('comment'), command,
data.get('Size'), parent_image)
parent_image)
profile.debug('Putting json path')
store.put_content(repo_image.storage.locations, json_path, request.data)

25
util/gzipstream.py Normal file
View file

@ -0,0 +1,25 @@
"""
Defines utility methods for working with gzip streams.
"""
import zlib
# Window size for decompressing GZIP streams.
# This results in ZLIB automatically detecting the GZIP headers.
# http://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760
ZLIB_GZIP_WINDOW = zlib.MAX_WBITS | 32
def calculate_size_handler():
""" Returns an object and a SocketReader handler. The handler will gunzip the data it receives,
adding the size found to the object.
"""
uncompressed_size_info = {
'size': 0
}
decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)
def fn(buf):
uncompressed_size_info['size'] += len(decompressor.decompress(buf))
return uncompressed_size_info, fn