Fix stream_write to properly raise an exception on failure, instead of just silently failing
This was causing problems for customers using georeplication over unstable storage engines Also adds tests for stream_write and copy, to ensure we detect failure
This commit is contained in:
		
							parent
							
								
									a048ff3633
								
							
						
					
					
						commit
						3a0adfcb11
					
				
					 3 changed files with 59 additions and 18 deletions
				
			
		|  | @ -84,7 +84,7 @@ class BaseStorage(StoragePaths): | ||||||
| 
 | 
 | ||||||
|   def stream_write_to_fp(self, in_fp, out_fp, num_bytes=READ_UNTIL_END): |   def stream_write_to_fp(self, in_fp, out_fp, num_bytes=READ_UNTIL_END): | ||||||
|     """ Copy the specified number of bytes from the input file stream to the output stream. If |     """ Copy the specified number of bytes from the input file stream to the output stream. If | ||||||
|         num_bytes < 0 copy until the stream ends. |         num_bytes < 0 copy until the stream ends. Returns the number of bytes copied. | ||||||
|     """ |     """ | ||||||
|     bytes_copied = 0 |     bytes_copied = 0 | ||||||
|     while bytes_copied < num_bytes or num_bytes == READ_UNTIL_END: |     while bytes_copied < num_bytes or num_bytes == READ_UNTIL_END: | ||||||
|  |  | ||||||
|  | @ -182,12 +182,28 @@ class _CloudStorage(BaseStorageV2): | ||||||
|                                                         **self._upload_params) |                                                         **self._upload_params) | ||||||
| 
 | 
 | ||||||
|   def stream_write(self, path, fp, content_type=None, content_encoding=None): |   def stream_write(self, path, fp, content_type=None, content_encoding=None): | ||||||
|     self._stream_write_internal(path, fp, content_type, content_encoding) |     """ Writes the data found in the file-like stream tot he given path. Raises an IOError | ||||||
|  |         if the write fails. | ||||||
|  |     """ | ||||||
|  |     _, write_error = self._stream_write_internal(path, fp, content_type, content_encoding) | ||||||
|  |     if write_error is not None: | ||||||
|  |       logger.error('Error when trying to stream_write path `%s`: %s', path, write_error) | ||||||
|  |       raise IOError('Exception when trying to stream_write path') | ||||||
| 
 | 
 | ||||||
|   def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None, |   def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None, | ||||||
|                              cancel_on_error=True, size=filelike.READ_UNTIL_END): |                              cancel_on_error=True, size=filelike.READ_UNTIL_END): | ||||||
|  |     """ Writes the data found in the file-like stream to the given path, with optional limit | ||||||
|  |         on size. Note that this method returns a *tuple* of (bytes_written, write_error) and should | ||||||
|  |         *not* raise an exception (such as IOError) if a problem uploading occurred. ALWAYS check | ||||||
|  |         the returned tuple on calls to this method. | ||||||
|  |     """ | ||||||
|     write_error = None |     write_error = None | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|       mp = self.__initiate_multipart_upload(path, content_type, content_encoding) |       mp = self.__initiate_multipart_upload(path, content_type, content_encoding) | ||||||
|  |     except S3ResponseError as e: | ||||||
|  |       logger.exception('Exception when initiating multipart upload') | ||||||
|  |       return 0, e | ||||||
| 
 | 
 | ||||||
|     # We are going to reuse this but be VERY careful to only read the number of bytes written to it |     # We are going to reuse this but be VERY careful to only read the number of bytes written to it | ||||||
|     buf = StringIO.StringIO() |     buf = StringIO.StringIO() | ||||||
|  | @ -211,7 +227,7 @@ class _CloudStorage(BaseStorageV2): | ||||||
|         mp.upload_part_from_file(buf, num_part, size=bytes_staged) |         mp.upload_part_from_file(buf, num_part, size=bytes_staged) | ||||||
|         total_bytes_written += bytes_staged |         total_bytes_written += bytes_staged | ||||||
|         num_part += 1 |         num_part += 1 | ||||||
|       except IOError as e: |       except (S3ResponseError, IOError) as e: | ||||||
|         logger.warn('Error when writing to stream in stream_write_internal at path %s: %s', path, e) |         logger.warn('Error when writing to stream in stream_write_internal at path %s: %s', path, e) | ||||||
|         write_error = e |         write_error = e | ||||||
| 
 | 
 | ||||||
|  | @ -219,7 +235,11 @@ class _CloudStorage(BaseStorageV2): | ||||||
|           self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['failure']) |           self._context.metric_queue.multipart_upload_end.Inc(labelvalues=['failure']) | ||||||
| 
 | 
 | ||||||
|         if cancel_on_error: |         if cancel_on_error: | ||||||
|  |           try: | ||||||
|             mp.cancel_upload() |             mp.cancel_upload() | ||||||
|  |           except (S3ResponseError, IOError): | ||||||
|  |             logger.exception('Could not cancel upload') | ||||||
|  | 
 | ||||||
|           return 0, write_error |           return 0, write_error | ||||||
|         else: |         else: | ||||||
|           break |           break | ||||||
|  | @ -263,6 +283,7 @@ class _CloudStorage(BaseStorageV2): | ||||||
|     return k.etag[1:-1][:7] |     return k.etag[1:-1][:7] | ||||||
| 
 | 
 | ||||||
|   def copy_to(self, destination, path): |   def copy_to(self, destination, path): | ||||||
|  |     """ Copies the given path from this storage to the destination storage. """ | ||||||
|     self._initialize_cloud_conn() |     self._initialize_cloud_conn() | ||||||
| 
 | 
 | ||||||
|     # First try to copy directly via boto, but only if the storages are the |     # First try to copy directly via boto, but only if the storages are the | ||||||
|  | @ -527,6 +548,11 @@ class GoogleCloudStorage(_CloudStorage): | ||||||
| 
 | 
 | ||||||
|   def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None, |   def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None, | ||||||
|                              cancel_on_error=True, size=filelike.READ_UNTIL_END): |                              cancel_on_error=True, size=filelike.READ_UNTIL_END): | ||||||
|  |     """ Writes the data found in the file-like stream to the given path, with optional limit | ||||||
|  |         on size. Note that this method returns a *tuple* of (bytes_written, write_error) and should | ||||||
|  |         *not* raise an exception (such as IOError) if a problem uploading occurred. ALWAYS check | ||||||
|  |         the returned tuple on calls to this method. | ||||||
|  |     """ | ||||||
|     # Minimum size of upload part size on S3 is 5MB |     # Minimum size of upload part size on S3 is 5MB | ||||||
|     self._initialize_cloud_conn() |     self._initialize_cloud_conn() | ||||||
|     path = self._init_path(path) |     path = self._init_path(path) | ||||||
|  |  | ||||||
|  | @ -7,6 +7,8 @@ import pytest | ||||||
| import moto | import moto | ||||||
| import boto | import boto | ||||||
| 
 | 
 | ||||||
|  | from moto import mock_s3 | ||||||
|  | 
 | ||||||
| from storage import S3Storage, StorageContext | from storage import S3Storage, StorageContext | ||||||
| from storage.cloud import _CloudStorage, _PartUploadMetadata | from storage.cloud import _CloudStorage, _PartUploadMetadata | ||||||
| from storage.cloud import _CHUNKS_KEY | from storage.cloud import _CHUNKS_KEY | ||||||
|  | @ -56,28 +58,29 @@ def test_basicop(storage_engine): | ||||||
|   assert not storage_engine.exists(_TEST_PATH) |   assert not storage_engine.exists(_TEST_PATH) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_copy_samecreds(storage_engine): | @pytest.mark.parametrize('bucket, username, password', [ | ||||||
|  |   pytest.param(_TEST_BUCKET, _TEST_USER, _TEST_PASSWORD, id='same credentials'), | ||||||
|  |   pytest.param('another_bucket', 'blech', 'password', id='different credentials'), | ||||||
|  | ]) | ||||||
|  | def test_copy(bucket, username, password, storage_engine): | ||||||
|   # Copy the content to another engine. |   # Copy the content to another engine. | ||||||
|   another_engine = S3Storage(_TEST_CONTEXT, 'another/path', _TEST_BUCKET, _TEST_USER, |   another_engine = S3Storage(_TEST_CONTEXT, 'another/path', _TEST_BUCKET, _TEST_USER, | ||||||
|                              _TEST_PASSWORD) |                              _TEST_PASSWORD) | ||||||
|   storage_engine.copy_to(another_engine, _TEST_PATH) |  | ||||||
| 
 |  | ||||||
|   # Verify it can be retrieved. |  | ||||||
|   assert another_engine.get_content(_TEST_PATH) == _TEST_CONTENT |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def test_copy_differentcreds(storage_engine): |  | ||||||
|   # Copy the content to another engine. |  | ||||||
|   another_engine = S3Storage(_TEST_CONTEXT, 'another/path', 'another_bucket', 'blech', |  | ||||||
|                              'password') |  | ||||||
|   boto.connect_s3().create_bucket('another_bucket') |   boto.connect_s3().create_bucket('another_bucket') | ||||||
| 
 |  | ||||||
|   storage_engine.copy_to(another_engine, _TEST_PATH) |   storage_engine.copy_to(another_engine, _TEST_PATH) | ||||||
| 
 | 
 | ||||||
|   # Verify it can be retrieved. |   # Verify it can be retrieved. | ||||||
|   assert another_engine.get_content(_TEST_PATH) == _TEST_CONTENT |   assert another_engine.get_content(_TEST_PATH) == _TEST_CONTENT | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def test_copy_with_error(storage_engine): | ||||||
|  |   another_engine = S3Storage(_TEST_CONTEXT, 'another/path', 'anotherbucket', 'foo', | ||||||
|  |                              'bar') | ||||||
|  | 
 | ||||||
|  |   with pytest.raises(IOError): | ||||||
|  |     storage_engine.copy_to(another_engine, _TEST_PATH) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def test_stream_read(storage_engine): | def test_stream_read(storage_engine): | ||||||
|   # Read the streaming content. |   # Read the streaming content. | ||||||
|   data = ''.join(storage_engine.stream_read(_TEST_PATH)) |   data = ''.join(storage_engine.stream_read(_TEST_PATH)) | ||||||
|  | @ -95,6 +98,18 @@ def test_stream_write(storage_engine): | ||||||
|   assert storage_engine.get_content(_TEST_PATH) == new_data |   assert storage_engine.get_content(_TEST_PATH) == new_data | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def test_stream_write_error(): | ||||||
|  |   with mock_s3(): | ||||||
|  |     # Create an engine but not the bucket. | ||||||
|  |     engine = S3Storage(_TEST_CONTEXT, 'some/path', _TEST_BUCKET, _TEST_USER, _TEST_PASSWORD) | ||||||
|  | 
 | ||||||
|  |     # Attempt to write to the uncreated bucket, which should raise an error. | ||||||
|  |     with pytest.raises(IOError): | ||||||
|  |       engine.stream_write(_TEST_PATH, StringIO('hello world'), content_type='Cool/Type') | ||||||
|  | 
 | ||||||
|  |     assert not engine.exists(_TEST_PATH) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| @pytest.mark.parametrize('chunk_count', [ | @pytest.mark.parametrize('chunk_count', [ | ||||||
|   0, |   0, | ||||||
|   1, |   1, | ||||||
|  |  | ||||||
		Reference in a new issue