From 47be7cab7a242a93066f7c867b0d318a0cfe5ab5 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 20 Oct 2014 13:11:33 -0400 Subject: [PATCH] Compute the tarsum only when required. Newer versions of Docker only require the simple SHA256 checksum, so this should save us from writing to a temp file. --- endpoints/registry.py | 40 ++++++++++++++++++++++++++++------------ util/gzipstream.py | 7 +++++-- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/endpoints/registry.py b/endpoints/registry.py index 2327ffde6..48943bf4c 100644 --- a/endpoints/registry.py +++ b/endpoints/registry.py @@ -197,12 +197,15 @@ def put_image_layer(namespace, repository, image_id): # Create a socket reader to read the input stream containing the layer data. sr = SocketReader(input_stream) - # Add a handler that store the data in storage. - tmp, store_hndlr = store.temp_store_handler() - sr.add_handler(store_hndlr) + # Add a handler that copies the data into a temp file. This is used to calculate the tarsum, + # which is only needed for older versions of Docker. + requires_tarsum = session.get('checksum_format') == 'tarsum' + if requires_tarsum: + tmp, tmp_hndlr = store.temp_store_handler() + sr.add_handler(tmp_hndlr) - # Add a handler to compute the uncompressed size of the layer. - uncompressed_size_info, size_hndlr = gzipstream.calculate_size_handler() + # Add a handler to compute the compressed and uncompressed sizes of the layer. + size_info, size_hndlr = gzipstream.calculate_size_handler() sr.add_handler(size_hndlr) # Add a handler which computes the checksum. @@ -217,14 +220,15 @@ def put_image_layer(namespace, repository, image_id): csums.append('sha256:{0}'.format(h.hexdigest())) try: - image_size = tmp.tell() - # Save the size of the image. - model.set_image_size(image_id, namespace, repository, image_size, uncompressed_size_info.size) + model.set_image_size(image_id, namespace, repository, size_info.compressed_size, + size_info.uncompressed_size) + + if requires_tarsum: + tmp.seek(0) + csums.append(checksums.compute_tarsum(tmp, json_data)) + tmp.close() - tmp.seek(0) - csums.append(checksums.compute_tarsum(tmp, json_data)) - tmp.close() except (IOError, checksums.TarError) as e: logger.debug('put_image_layer: Error when computing tarsum ' '{0}'.format(e)) @@ -267,7 +271,19 @@ def put_image_checksum(namespace, repository, image_id): if not permission.can(): abort(403) - checksum = request.headers.get('X-Docker-Checksum') + # Docker Version < 0.10 (tarsum+sha): + old_checksum = request.headers.get('X-Docker-Checksum') + + # Docker Version >= 0.10 (sha): + new_checksum = request.headers.get('X-Docker-Checksum-Payload') + + # Store whether we need to calculate the tarsum. + if new_checksum: + session['checksum_format'] = 'sha256' + else: + session['checksum_format'] = 'tarsum' + + checksum = new_checksum or old_checksum if not checksum: abort(400, "Missing checksum for image %(image_id)s", issue='missing-checksum', image_id=image_id) diff --git a/util/gzipstream.py b/util/gzipstream.py index eb4139833..739d0cd8c 100644 --- a/util/gzipstream.py +++ b/util/gzipstream.py @@ -13,7 +13,8 @@ CHUNK_SIZE = 5 * 1024 * 1024 class SizeInfo(object): def __init__(self): - self.size = 0 + self.uncompressed_size = 0 + self.compressed_size = 0 def calculate_size_handler(): """ Returns an object and a SocketReader handler. The handler will gunzip the data it receives, @@ -28,8 +29,10 @@ def calculate_size_handler(): # Note: We set a maximum CHUNK_SIZE to prevent the decompress from taking too much # memory. As a result, we have to loop until the unconsumed tail is empty. current_data = buf + size_info.compressed_size += len(current_data) + while len(current_data) > 0: - size_info.size += len(decompressor.decompress(current_data, CHUNK_SIZE)) + size_info.uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE)) current_data = decompressor.unconsumed_tail return size_info, fn