quay/util/registry/gzipstream.py

"""
Defines utility methods for working with gzip streams.
"""

import zlib
import time

# Window size for decompressing GZIP streams.
# This results in ZLIB automatically detecting the GZIP headers.
# http://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760
ZLIB_GZIP_WINDOW = zlib.MAX_WBITS | 32

CHUNK_SIZE = 5 * 1024 * 1024

class SizeInfo(object):
  def __init__(self):
    self.uncompressed_size = 0
    self.compressed_size = 0
    self.is_valid = True

def calculate_size_handler():
  """ Returns an object and a SocketReader handler. The handler will gunzip the data it receives,
      adding the size found to the object.
  """

  size_info = SizeInfo()
  decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)

  def fn(buf):
    if not size_info.is_valid:
      return

    # Note: We set a maximum CHUNK_SIZE to prevent the decompress from taking too much
    # memory. As a result, we have to loop until the unconsumed tail is empty.
    current_data = buf
    size_info.compressed_size += len(current_data)

    while len(current_data) > 0:
      try:
        size_info.uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
      except:
        # The gzip stream is not valid for some reason.
        size_info.uncompressed_size = None
        size_info.is_valid = False
        return

      current_data = decompressor.unconsumed_tail

      # Make sure we allow the scheduler to do other work if we get stuck in this tight loop.
      if len(current_data) > 0:
        time.sleep(0)

  return size_info, fn
initial import for Open Source 🎉 2019-11-12 16:09:47 +00:00			`"""`
			`Defines utility methods for working with gzip streams.`
			`"""`

			`import zlib`
			`import time`

			`# Window size for decompressing GZIP streams.`
			`# This results in ZLIB automatically detecting the GZIP headers.`
			`# http://stackoverflow.com/questions/3122145/zlib-error-error-3-while-decompressing-incorrect-header-check/22310760#22310760`
			`ZLIB_GZIP_WINDOW = zlib.MAX_WBITS \| 32`

			`CHUNK_SIZE = 5 * 1024 * 1024`

			`class SizeInfo(object):`
			`def __init__(self):`
			`self.uncompressed_size = 0`
			`self.compressed_size = 0`
			`self.is_valid = True`

			`def calculate_size_handler():`
			`""" Returns an object and a SocketReader handler. The handler will gunzip the data it receives,`
			`adding the size found to the object.`
			`"""`

			`size_info = SizeInfo()`
			`decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)`

			`def fn(buf):`
			`if not size_info.is_valid:`
			`return`

			`# Note: We set a maximum CHUNK_SIZE to prevent the decompress from taking too much`
			`# memory. As a result, we have to loop until the unconsumed tail is empty.`
			`current_data = buf`
			`size_info.compressed_size += len(current_data)`

			`while len(current_data) > 0:`
			`try:`
			`size_info.uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))`
			`except:`
			`# The gzip stream is not valid for some reason.`
			`size_info.uncompressed_size = None`
			`size_info.is_valid = False`
			`return`

			`current_data = decompressor.unconsumed_tail`

			`# Make sure we allow the scheduler to do other work if we get stuck in this tight loop.`
			`if len(current_data) > 0:`
			`time.sleep(0)`

			`return size_info, fn`