Make sure we don't generate chunk sizes larger than 5 GB.

Amazon S3 does not allow for chunk sizes larger than 5 GB; we currently don't handle that case at all, which is why large uploads are failing. This change ensures that if a storage engine specifies a *maximum* chunk size, we write multiple chunks no larger than that size.
This commit is contained in:
Joseph Schorr 2016-09-27 12:23:32 +02:00
parent a74e94fb67
commit bfe2646a50
2 changed files with 112 additions and 8 deletions

View file

@ -4,6 +4,7 @@ import boto
import os
from storage import S3Storage
from storage.cloud import _CloudStorage, _PartUploadMetadata
from storage.cloud import _CHUNKS_KEY
from StringIO import StringIO
@ -140,6 +141,88 @@ class TestCloudStorage(unittest.TestCase):
for chunk in metadata[_CHUNKS_KEY]:
self.assertFalse(self.engine.exists(chunk.path))
def test_large_chunks_upload(self):
# Make the max chunk size much smaller for testing.
self.engine.maximum_chunk_size = self.engine.minimum_chunk_size * 2
upload_id, metadata = self.engine.initiate_chunked_upload()
# Write a "super large" chunk, to ensure that it is broken into smaller chunks.
chunk_data = os.urandom(int(self.engine.maximum_chunk_size * 2.5))
bytes_written, new_metadata, _ = self.engine.stream_upload_chunk(upload_id, 0,
-1,
StringIO(chunk_data),
metadata)
self.assertEquals(bytes_written, len(chunk_data))
# Complete the chunked upload.
self.engine.complete_chunked_upload(upload_id, 'some/chunked/path', new_metadata)
# Ensure the file contents are valid.
self.assertEquals(len(self.engine.get_content('some/chunked/path')), len(chunk_data))
self.assertEquals(chunk_data, self.engine.get_content('some/chunked/path'))
def test_large_chunks_with_ragged_edge(self):
# Make the max chunk size much smaller for testing and force it to have a ragged edge.
self.engine.maximum_chunk_size = self.engine.minimum_chunk_size * 2 + 10
upload_id, metadata = self.engine.initiate_chunked_upload()
# Write a few "super large" chunks, to ensure that it is broken into smaller chunks.
all_data = ''
for _ in range(0, 2):
chunk_data = os.urandom(int(self.engine.maximum_chunk_size) + 20)
bytes_written, new_metadata, _ = self.engine.stream_upload_chunk(upload_id, 0,
-1,
StringIO(chunk_data),
metadata)
self.assertEquals(bytes_written, len(chunk_data))
all_data = all_data + chunk_data
metadata = new_metadata
# Complete the chunked upload.
self.engine.complete_chunked_upload(upload_id, 'some/chunked/path', new_metadata)
# Ensure the file contents are valid.
self.assertEquals(len(self.engine.get_content('some/chunked/path')), len(all_data))
self.assertEquals(all_data, self.engine.get_content('some/chunked/path'))
def assertRechunked(self, chunk, max_size, *args):
rechunked = list(_CloudStorage._rechunk(chunk, max_size))
self.assertEquals(len(rechunked), len(args), rechunked)
for index, chunk in enumerate(rechunked):
self.assertEquals(args[index], chunk)
def test_rechunking(self):
chunk = _PartUploadMetadata('foo', 0, 100)
self.assertRechunked(chunk, 50,
_PartUploadMetadata('foo', 0, 50),
_PartUploadMetadata('foo', 50, 50))
self.assertRechunked(chunk, 40,
_PartUploadMetadata('foo', 0, 25),
_PartUploadMetadata('foo', 25, 25),
_PartUploadMetadata('foo', 50, 25),
_PartUploadMetadata('foo', 75, 25))
self.assertRechunked(chunk, 51,
_PartUploadMetadata('foo', 0, 50),
_PartUploadMetadata('foo', 50, 50))
self.assertRechunked(chunk, 49,
_PartUploadMetadata('foo', 0, 25),
_PartUploadMetadata('foo', 25, 25),
_PartUploadMetadata('foo', 50, 25),
_PartUploadMetadata('foo', 75, 25))
self.assertRechunked(chunk, 99,
_PartUploadMetadata('foo', 0, 50),
_PartUploadMetadata('foo', 50, 50))
self.assertRechunked(chunk, 100,
_PartUploadMetadata('foo', 0, 100))
if __name__ == '__main__':
unittest.main()