Handle the common case of one chunk when calculating the uncompressed size
Reference #992
This commit is contained in:
parent
1323da20e3
commit
54095eb5cb
6 changed files with 65 additions and 4 deletions
|
@ -16,6 +16,7 @@ class SizeInfo(object):
|
|||
def __init__(self):
|
||||
self.uncompressed_size = 0
|
||||
self.compressed_size = 0
|
||||
self.is_valid = True
|
||||
|
||||
def calculate_size_handler():
|
||||
""" Returns an object and a SocketReader handler. The handler will gunzip the data it receives,
|
||||
|
@ -23,17 +24,26 @@ def calculate_size_handler():
|
|||
"""
|
||||
|
||||
size_info = SizeInfo()
|
||||
|
||||
decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW)
|
||||
|
||||
def fn(buf):
|
||||
if not size_info.is_valid:
|
||||
return
|
||||
|
||||
# Note: We set a maximum CHUNK_SIZE to prevent the decompress from taking too much
|
||||
# memory. As a result, we have to loop until the unconsumed tail is empty.
|
||||
current_data = buf
|
||||
size_info.compressed_size += len(current_data)
|
||||
|
||||
while len(current_data) > 0:
|
||||
size_info.uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
|
||||
try:
|
||||
size_info.uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE))
|
||||
except:
|
||||
# The gzip stream is not valid for some reason.
|
||||
size_info.uncompressed_size = None
|
||||
size_info.is_valid = False
|
||||
return
|
||||
|
||||
current_data = decompressor.unconsumed_tail
|
||||
|
||||
# Make sure we allow the scheduler to do other work if we get stuck in this tight loop.
|
||||
|
|
Reference in a new issue