Handle the common case of one chunk when calculating the uncompressed size

Reference #992
2015-11-30 14:25:01 -05:00 · 2015-11-30 14:25:01 -05:00 · 54095eb5cb
commit 54095eb5cb
parent 1323da20e3
6 changed files with 65 additions and 4 deletions
--- a/util/registry/gzipstream.py
+++ b/util/registry/gzipstream.py
@ -16,6 +16,7 @@ class SizeInfo(object):
  def __init__(self):
    self.uncompressed_size = 0
    self.compressed_size = 0
+    self.is_valid = True

 def calculate_size_handler():
  """ Returns an object and a SocketReader handler. The handler will gunzip the data it receives,
@ -23,17 +24,26 @@ def calculate_size_handler():
  """

  size_info = SizeInfo()
-
  decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)

  def fn(buf):
+    if not size_info.is_valid:
+      return
+
    # Note: We set a maximum CHUNK_SIZE to prevent the decompress from taking too much
    # memory. As a result, we have to loop until the unconsumed tail is empty.
    current_data = buf
    size_info.compressed_size += len(current_data)

    while len(current_data) > 0:
-      size_info.uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
+      try:
+        size_info.uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
+      except:
+        # The gzip stream is not valid for some reason.
+        size_info.uncompressed_size = None
+        size_info.is_valid = False
+        return
+
      current_data = decompressor.unconsumed_tail

      # Make sure we allow the scheduler to do other work if we get stuck in this tight loop.