Make sure we don't generate chunk sizes larger than 5 GB.
Amazon S3 does not allow for chunk sizes larger than 5 GB; we currently don't handle that case at all, which is why large uploads are failing. This change ensures that if a storage engine specifies a *maximum* chunk size, we write multiple chunks no larger than that size.
This commit is contained in:
parent
a74e94fb67
commit
bfe2646a50
2 changed files with 112 additions and 8 deletions
|
@ -4,6 +4,7 @@ import boto
|
|||
import os
|
||||
|
||||
from storage import S3Storage
|
||||
from storage.cloud import _CloudStorage, _PartUploadMetadata
|
||||
from storage.cloud import _CHUNKS_KEY
|
||||
from StringIO import StringIO
|
||||
|
||||
|
@ -140,6 +141,88 @@ class TestCloudStorage(unittest.TestCase):
|
|||
for chunk in metadata[_CHUNKS_KEY]:
|
||||
self.assertFalse(self.engine.exists(chunk.path))
|
||||
|
||||
def test_large_chunks_upload(self):
|
||||
# Make the max chunk size much smaller for testing.
|
||||
self.engine.maximum_chunk_size = self.engine.minimum_chunk_size * 2
|
||||
|
||||
upload_id, metadata = self.engine.initiate_chunked_upload()
|
||||
|
||||
# Write a "super large" chunk, to ensure that it is broken into smaller chunks.
|
||||
chunk_data = os.urandom(int(self.engine.maximum_chunk_size * 2.5))
|
||||
bytes_written, new_metadata, _ = self.engine.stream_upload_chunk(upload_id, 0,
|
||||
-1,
|
||||
StringIO(chunk_data),
|
||||
metadata)
|
||||
self.assertEquals(bytes_written, len(chunk_data))
|
||||
|
||||
# Complete the chunked upload.
|
||||
self.engine.complete_chunked_upload(upload_id, 'some/chunked/path', new_metadata)
|
||||
|
||||
# Ensure the file contents are valid.
|
||||
self.assertEquals(len(self.engine.get_content('some/chunked/path')), len(chunk_data))
|
||||
self.assertEquals(chunk_data, self.engine.get_content('some/chunked/path'))
|
||||
|
||||
def test_large_chunks_with_ragged_edge(self):
|
||||
# Make the max chunk size much smaller for testing and force it to have a ragged edge.
|
||||
self.engine.maximum_chunk_size = self.engine.minimum_chunk_size * 2 + 10
|
||||
|
||||
upload_id, metadata = self.engine.initiate_chunked_upload()
|
||||
|
||||
# Write a few "super large" chunks, to ensure that it is broken into smaller chunks.
|
||||
all_data = ''
|
||||
for _ in range(0, 2):
|
||||
chunk_data = os.urandom(int(self.engine.maximum_chunk_size) + 20)
|
||||
bytes_written, new_metadata, _ = self.engine.stream_upload_chunk(upload_id, 0,
|
||||
-1,
|
||||
StringIO(chunk_data),
|
||||
metadata)
|
||||
self.assertEquals(bytes_written, len(chunk_data))
|
||||
all_data = all_data + chunk_data
|
||||
metadata = new_metadata
|
||||
|
||||
# Complete the chunked upload.
|
||||
self.engine.complete_chunked_upload(upload_id, 'some/chunked/path', new_metadata)
|
||||
|
||||
# Ensure the file contents are valid.
|
||||
self.assertEquals(len(self.engine.get_content('some/chunked/path')), len(all_data))
|
||||
self.assertEquals(all_data, self.engine.get_content('some/chunked/path'))
|
||||
|
||||
def assertRechunked(self, chunk, max_size, *args):
|
||||
rechunked = list(_CloudStorage._rechunk(chunk, max_size))
|
||||
self.assertEquals(len(rechunked), len(args), rechunked)
|
||||
for index, chunk in enumerate(rechunked):
|
||||
self.assertEquals(args[index], chunk)
|
||||
|
||||
def test_rechunking(self):
|
||||
chunk = _PartUploadMetadata('foo', 0, 100)
|
||||
|
||||
self.assertRechunked(chunk, 50,
|
||||
_PartUploadMetadata('foo', 0, 50),
|
||||
_PartUploadMetadata('foo', 50, 50))
|
||||
|
||||
self.assertRechunked(chunk, 40,
|
||||
_PartUploadMetadata('foo', 0, 25),
|
||||
_PartUploadMetadata('foo', 25, 25),
|
||||
_PartUploadMetadata('foo', 50, 25),
|
||||
_PartUploadMetadata('foo', 75, 25))
|
||||
|
||||
self.assertRechunked(chunk, 51,
|
||||
_PartUploadMetadata('foo', 0, 50),
|
||||
_PartUploadMetadata('foo', 50, 50))
|
||||
|
||||
self.assertRechunked(chunk, 49,
|
||||
_PartUploadMetadata('foo', 0, 25),
|
||||
_PartUploadMetadata('foo', 25, 25),
|
||||
_PartUploadMetadata('foo', 50, 25),
|
||||
_PartUploadMetadata('foo', 75, 25))
|
||||
|
||||
self.assertRechunked(chunk, 99,
|
||||
_PartUploadMetadata('foo', 0, 50),
|
||||
_PartUploadMetadata('foo', 50, 50))
|
||||
|
||||
self.assertRechunked(chunk, 100,
|
||||
_PartUploadMetadata('foo', 0, 100))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
Reference in a new issue