Handle the common case of one chunk when calculating the uncompressed size

Reference #992
2015-11-30 14:25:01 -05:00 · 2015-11-30 14:25:01 -05:00 · 54095eb5cb
commit 54095eb5cb
parent 1323da20e3
6 changed files with 65 additions and 4 deletions
--- a/endpoints/v2/blob.py
+++ b/endpoints/v2/blob.py
@ -13,6 +13,7 @@ from auth.jwt_auth import process_jwt_auth
 from endpoints.decorators import anon_protect
 from util.cache import cache_control
 from util.registry.filelike import wrap_with_handler, StreamSlice
+from util.registry.gzipstream import calculate_size_handler
 from storage.basestorage import InvalidChunkException


@ -220,6 +221,14 @@ def _upload_chunk(namespace, repo_name, upload_uuid):

    input_fp = wrap_with_handler(input_fp, found.sha_state.update)

+    # If this is the first chunk and we're starting at the 0 offset, add a handler to gunzip the
+    # stream so we can determine the uncompressed size. We'll throw out this data if another chunk
+    # comes in, but in the common case Docker only sends one chunk.
+    size_info = None
+    if start_offset == 0 and found.chunk_count == 0:
+      size_info, fn = calculate_size_handler()
+      input_fp = wrap_with_handler(input_fp, fn)
+
    try:
      length_written, new_metadata, error = storage.stream_upload_chunk(location_set, upload_uuid,
                                                                        start_offset, length, input_fp,
@ -228,8 +237,18 @@ def _upload_chunk(namespace, repo_name, upload_uuid):
    except InvalidChunkException:
      _range_not_satisfiable(found.byte_count)

+  # If we determined an uncompressed size and this is the first chunk, add it to the blob.
+  # Otherwise, we clear the size from the blob as it was uploaded in multiple chunks.
+  if size_info is not None and found.chunk_count == 0 and size_info.is_valid:
+    found.uncompressed_byte_count = size_info.uncompressed_size
+  elif length_written > 0:
+    # Otherwise, if we wrote some bytes and the above conditions were not met, then we don't
+    # know the uncompressed size.
+    found.uncompressed_byte_count = None
+
  found.storage_metadata = new_metadata
  found.byte_count += length_written
+  found.chunk_count += 1
  return found, error


@ -257,7 +276,8 @@ def _finish_upload(namespace, repo_name, upload_obj, expected_digest):
  # Mark the blob as uploaded.
  model.blob.store_blob_record_and_temp_link(namespace, repo_name, expected_digest,
                                             upload_obj.location, upload_obj.byte_count,
-                                             app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'])
+                                             app.config['PUSH_TEMP_TAG_EXPIRATION_SEC'],
+                                             upload_obj.uncompressed_byte_count)

  # Delete the upload tracking row.
  upload_obj.delete_instance()