diff --git a/data/model/legacy.py b/data/model/legacy.py index 07c8b9e2a..cbdb9a5a5 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -1251,7 +1251,7 @@ def set_image_size(docker_image_id, namespace_name, repository_name, def set_image_metadata(docker_image_id, namespace_name, repository_name, created_date_str, comment, - command, uncompressed_size, parent=None): + command, parent=None): with config.app_config['DB_TRANSACTION_FACTORY'](db): query = (Image .select(Image, ImageStorage) @@ -1272,7 +1272,6 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created fetched.storage.created = dateutil.parser.parse(created_date_str).replace(tzinfo=None) fetched.storage.comment = comment fetched.storage.command = command - fetched.storage.uncompressed_size = uncompressed_size if parent: fetched.ancestors = '%s%s/' % (parent.ancestors, parent.id) diff --git a/endpoints/registry.py b/endpoints/registry.py index b6978f44d..14bd88ce0 100644 --- a/endpoints/registry.py +++ b/endpoints/registry.py @@ -14,6 +14,7 @@ from util.http import abort, exact_abort from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) from data import model +from util import gzipstream registry = Blueprint('registry', __name__) @@ -193,14 +194,26 @@ def put_image_layer(namespace, repository, image_id): # encoding (Gunicorn) input_stream = request.environ['wsgi.input'] - # compute checksums - csums = [] + # Create a socket reader to read the input stream containing the layer data. sr = SocketReader(input_stream) + + # Add a handler that store the data in storage. tmp, store_hndlr = store.temp_store_handler() sr.add_handler(store_hndlr) + + # Add a handler to compute the uncompressed size of the layer. + uncompressed_size_info, size_hndlr = gzipstream.calculate_size_handler() + sr.add_handler(size_hndlr) + + # Add a handler which computes the checksum. h, sum_hndlr = checksums.simple_checksum_handler(json_data) sr.add_handler(sum_hndlr) + + # Stream write the data to storage. store.stream_write(repo_image.storage.locations, layer_path, sr) + + # Append the computed checksum. + csums = [] csums.append('sha256:{0}'.format(h.hexdigest())) try: @@ -216,6 +229,12 @@ def put_image_layer(namespace, repository, image_id): logger.debug('put_image_layer: Error when computing tarsum ' '{0}'.format(e)) + # Write the uncompressed image size, if any. + if uncompressed_size_info['size'] > 0: + profile.debug('Storing uncompressed layer size: %s' % uncompressed_size_info['size']) + repo_image.storage.uncompressed_size = uncompressed_size_info['size'] + repo_image.storage.save() + if repo_image.storage.checksum is None: # We don't have a checksum stored yet, that's fine skipping the check. # Not removing the mark though, image is not downloadable yet. @@ -460,7 +479,7 @@ def put_image_json(namespace, repository, image_id): profile.debug('Setting image metadata') model.set_image_metadata(image_id, namespace, repository, data.get('created'), data.get('comment'), command, - data.get('Size'), parent_image) + parent_image) profile.debug('Putting json path') store.put_content(repo_image.storage.locations, json_path, request.data) diff --git a/util/gzipstream.py b/util/gzipstream.py new file mode 100644 index 000000000..7dbb2fc3e --- /dev/null +++ b/util/gzipstream.py @@ -0,0 +1,25 @@ +""" +Defines utility methods for working with gzip streams. +""" + +import zlib + +# Window size for decompressing GZIP streams. +# This results in ZLIB automatically detecting the GZIP headers. +# http://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760 +ZLIB_GZIP_WINDOW = zlib.MAX_WBITS | 32 + +def calculate_size_handler(): + """ Returns an object and a SocketReader handler. The handler will gunzip the data it receives, + adding the size found to the object. + """ + uncompressed_size_info = { + 'size': 0 + } + + decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW) + + def fn(buf): + uncompressed_size_info['size'] += len(decompressor.decompress(buf)) + + return uncompressed_size_info, fn