Compute the tarsum only when required. Newer versions of Docker only require the simple SHA256 checksum, so this should save us from writing to a temp file.

2014-10-20 13:11:33 -04:00 · 2014-10-20 13:11:33 -04:00 · 47be7cab7a
commit 47be7cab7a
parent 28a463f998
2 changed files with 33 additions and 14 deletions
--- a/endpoints/registry.py
+++ b/endpoints/registry.py
@ -197,12 +197,15 @@ def put_image_layer(namespace, repository, image_id):
  # Create a socket reader to read the input stream containing the layer data.
  sr = SocketReader(input_stream)

-  # Add a handler that store the data in storage.
-  tmp, store_hndlr = store.temp_store_handler()
-  sr.add_handler(store_hndlr)
+  # Add a handler that copies the data into a temp file. This is used to calculate the tarsum,
+  # which is only needed for older versions of Docker.
+  requires_tarsum = session.get('checksum_format') == 'tarsum'
+  if requires_tarsum:
+    tmp, tmp_hndlr = store.temp_store_handler()
+    sr.add_handler(tmp_hndlr)

-  # Add a handler to compute the uncompressed size of the layer.
-  uncompressed_size_info, size_hndlr = gzipstream.calculate_size_handler()
+  # Add a handler to compute the compressed and uncompressed sizes of the layer.
+  size_info, size_hndlr = gzipstream.calculate_size_handler()
  sr.add_handler(size_hndlr)

  # Add a handler which computes the checksum.
@ -217,14 +220,15 @@ def put_image_layer(namespace, repository, image_id):
  csums.append('sha256:{0}'.format(h.hexdigest()))

  try:
-    image_size = tmp.tell()
-
    # Save the size of the image.
-    model.set_image_size(image_id, namespace, repository, image_size, uncompressed_size_info.size)
+    model.set_image_size(image_id, namespace, repository, size_info.compressed_size,
+                         size_info.uncompressed_size)
+
+    if requires_tarsum:
+      tmp.seek(0)
+      csums.append(checksums.compute_tarsum(tmp, json_data))
+      tmp.close()

-    tmp.seek(0)
-    csums.append(checksums.compute_tarsum(tmp, json_data))
-    tmp.close()
  except (IOError, checksums.TarError) as e:
    logger.debug('put_image_layer: Error when computing tarsum '
                 '{0}'.format(e))
@ -267,7 +271,19 @@ def put_image_checksum(namespace, repository, image_id):
  if not permission.can():
    abort(403)

-  checksum = request.headers.get('X-Docker-Checksum')
+  # Docker Version < 0.10 (tarsum+sha):
+  old_checksum = request.headers.get('X-Docker-Checksum')
+
+  # Docker Version >= 0.10 (sha):
+  new_checksum = request.headers.get('X-Docker-Checksum-Payload')
+
+  # Store whether we need to calculate the tarsum.
+  if new_checksum:
+    session['checksum_format'] = 'sha256'
+  else:
+    session['checksum_format'] = 'tarsum'
+
+  checksum = new_checksum or old_checksum
  if not checksum:
    abort(400, "Missing checksum for image %(image_id)s", issue='missing-checksum',
          image_id=image_id)
--- a/util/gzipstream.py
+++ b/util/gzipstream.py
@ -13,7 +13,8 @@ CHUNK_SIZE = 5 * 1024 * 1024

 class SizeInfo(object):
  def __init__(self):
-    self.size = 0
+    self.uncompressed_size = 0
+    self.compressed_size = 0

 def calculate_size_handler():
  """ Returns an object and a SocketReader handler. The handler will gunzip the data it receives,
@ -28,8 +29,10 @@ def calculate_size_handler():
    # Note: We set a maximum CHUNK_SIZE to prevent the decompress from taking too much
    # memory. As a result, we have to loop until the unconsumed tail is empty.
    current_data = buf
+    size_info.compressed_size += len(current_data)
+    
    while len(current_data) > 0:
-      size_info.size += len(decompressor.decompress(current_data, CHUNK_SIZE))
+      size_info.uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
      current_data = decompressor.unconsumed_tail

  return size_info, fn