From 47be7cab7a242a93066f7c867b0d318a0cfe5ab5 Mon Sep 17 00:00:00 2001
From: Joseph Schorr <joseph.schorr@coreos.com>
Date: Mon, 20 Oct 2014 13:11:33 -0400
Subject: [PATCH] Compute the tarsum only when required. Newer versions of
 Docker only require the simple SHA256 checksum, so this should save us from
 writing to a temp file.

---
 endpoints/registry.py | 40 ++++++++++++++++++++++++++++------------
 util/gzipstream.py    |  7 +++++--
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/endpoints/registry.py b/endpoints/registry.py
index 2327ffde6..48943bf4c 100644
--- a/endpoints/registry.py
+++ b/endpoints/registry.py
@@ -197,12 +197,15 @@ def put_image_layer(namespace, repository, image_id):
   # Create a socket reader to read the input stream containing the layer data.
   sr = SocketReader(input_stream)
 
-  # Add a handler that store the data in storage.
-  tmp, store_hndlr = store.temp_store_handler()
-  sr.add_handler(store_hndlr)
+  # Add a handler that copies the data into a temp file. This is used to calculate the tarsum,
+  # which is only needed for older versions of Docker.
+  requires_tarsum = session.get('checksum_format') == 'tarsum'
+  if requires_tarsum:
+    tmp, tmp_hndlr = store.temp_store_handler()
+    sr.add_handler(tmp_hndlr)
 
-  # Add a handler to compute the uncompressed size of the layer.
-  uncompressed_size_info, size_hndlr = gzipstream.calculate_size_handler()
+  # Add a handler to compute the compressed and uncompressed sizes of the layer.
+  size_info, size_hndlr = gzipstream.calculate_size_handler()
   sr.add_handler(size_hndlr)
 
   # Add a handler which computes the checksum.
@@ -217,14 +220,15 @@ def put_image_layer(namespace, repository, image_id):
   csums.append('sha256:{0}'.format(h.hexdigest()))
 
   try:
-    image_size = tmp.tell()
-
     # Save the size of the image.
-    model.set_image_size(image_id, namespace, repository, image_size, uncompressed_size_info.size)
+    model.set_image_size(image_id, namespace, repository, size_info.compressed_size,
+                         size_info.uncompressed_size)
+
+    if requires_tarsum:
+      tmp.seek(0)
+      csums.append(checksums.compute_tarsum(tmp, json_data))
+      tmp.close()
 
-    tmp.seek(0)
-    csums.append(checksums.compute_tarsum(tmp, json_data))
-    tmp.close()
   except (IOError, checksums.TarError) as e:
     logger.debug('put_image_layer: Error when computing tarsum '
                  '{0}'.format(e))
@@ -267,7 +271,19 @@ def put_image_checksum(namespace, repository, image_id):
   if not permission.can():
     abort(403)
 
-  checksum = request.headers.get('X-Docker-Checksum')
+  # Docker Version < 0.10 (tarsum+sha):
+  old_checksum = request.headers.get('X-Docker-Checksum')
+
+  # Docker Version >= 0.10 (sha):
+  new_checksum = request.headers.get('X-Docker-Checksum-Payload')
+
+  # Store whether we need to calculate the tarsum.
+  if new_checksum:
+    session['checksum_format'] = 'sha256'
+  else:
+    session['checksum_format'] = 'tarsum'
+
+  checksum = new_checksum or old_checksum
   if not checksum:
     abort(400, "Missing checksum for image %(image_id)s", issue='missing-checksum',
           image_id=image_id)
diff --git a/util/gzipstream.py b/util/gzipstream.py
index eb4139833..739d0cd8c 100644
--- a/util/gzipstream.py
+++ b/util/gzipstream.py
@@ -13,7 +13,8 @@ CHUNK_SIZE = 5 * 1024 * 1024
 
 class SizeInfo(object):
   def __init__(self):
-    self.size = 0
+    self.uncompressed_size = 0
+    self.compressed_size = 0
 
 def calculate_size_handler():
   """ Returns an object and a SocketReader handler. The handler will gunzip the data it receives,
@@ -28,8 +29,10 @@ def calculate_size_handler():
     # Note: We set a maximum CHUNK_SIZE to prevent the decompress from taking too much
     # memory. As a result, we have to loop until the unconsumed tail is empty.
     current_data = buf
+    size_info.compressed_size += len(current_data)
+    
     while len(current_data) > 0:
-      size_info.size += len(decompressor.decompress(current_data, CHUNK_SIZE))
+      size_info.uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
       current_data = decompressor.unconsumed_tail
 
   return size_info, fn