Check the returned ETag in Swift when streaming data

This ensures that if Swift mis-writes the data, we know immediately and can fail
This commit is contained in:
Joseph Schorr 2017-06-27 16:21:37 +03:00
parent 6b272cf7e6
commit 688312bb29
2 changed files with 38 additions and 8 deletions

View file

@ -11,7 +11,7 @@ import json
from _pyio import BufferedReader from _pyio import BufferedReader
from uuid import uuid4 from uuid import uuid4
from swiftclient.client import Connection, ClientException from swiftclient.client import Connection, ClientException, ReadableToIterable
from urlparse import urlparse from urlparse import urlparse
from random import SystemRandom from random import SystemRandom
from hashlib import sha1 from hashlib import sha1
@ -108,8 +108,12 @@ class SwiftStorage(BaseStorage):
if content_encoding is not None: if content_encoding is not None:
headers['Content-Encoding'] = content_encoding headers['Content-Encoding'] = content_encoding
is_filelike = hasattr(content, 'read')
if is_filelike:
content = ReadableToIterable(content, md5=True)
try: try:
self._get_connection().put_object(self._swift_container, path, content, etag = self._get_connection().put_object(self._swift_container, path, content,
chunk_size=chunk, content_type=content_type, chunk_size=chunk, content_type=content_type,
headers=headers) headers=headers)
except ClientException: except ClientException:
@ -120,6 +124,16 @@ class SwiftStorage(BaseStorage):
logger.exception('Could not put object at path %s: %s', path, ex) logger.exception('Could not put object at path %s: %s', path, ex)
raise IOError("Could not put content: %s" % path) raise IOError("Could not put content: %s" % path)
# If we wrapped the content in a ReadableToIterable, compare its MD5 to the etag returned. If
# they don't match, raise an IOError indicating a write failure.
if is_filelike:
if etag != content.get_md5sum():
logger.error('Got mismatch in md5 etag for path %s: Expected %s, but server has %s', path,
content.get_md5sum(), etag)
raise IOError('upload verification failed for path {0}:'
'md5 mismatch, local {1} != remote {2}'
.format(path, content.get_md5sum(), etag))
def _head_object(self, path): def _head_object(self, path):
path = self._normalize_path(path) path = self._normalize_path(path)
try: try:

View file

@ -1,5 +1,6 @@
import io import io
import pytest import pytest
import hashlib
from collections import defaultdict from collections import defaultdict
from mock import MagicMock from mock import MagicMock
@ -25,24 +26,28 @@ class MockSwiftStorage(SwiftStorage):
return self._connection return self._connection
class FakeSwiftStorage(SwiftStorage): class FakeSwiftStorage(SwiftStorage):
def __init__(self, *args, **kwargs): def __init__(self, fail_checksum=False, *args, **kwargs):
super(FakeSwiftStorage, self).__init__(*args, **kwargs) super(FakeSwiftStorage, self).__init__(*args, **kwargs)
self._connection = FakeSwift() self._connection = FakeSwift(fail_checksum=fail_checksum)
def _get_connection(self): def _get_connection(self):
return self._connection return self._connection
class FakeSwift(object): class FakeSwift(object):
def __init__(self): def __init__(self, fail_checksum=False):
self.containers = defaultdict(dict) self.containers = defaultdict(dict)
self.fail_checksum = fail_checksum
def head_object(self, container, path): def head_object(self, container, path):
return self.containers[container].get(path) return self.containers[container].get(path)
def put_object(self, container, path, content, chunk_size=None, content_type=None, headers=None): def put_object(self, container, path, content, chunk_size=None, content_type=None, headers=None):
if not isinstance(content, str): if not isinstance(content, str):
if hasattr(content, 'read'):
content = content.read() content = content.read()
else:
content = ''.join(content)
self.containers[container][path] = { self.containers[container][path] = {
'content': content, 'content': content,
@ -51,6 +56,10 @@ class FakeSwift(object):
'headers': headers, 'headers': headers,
} }
digest = hashlib.md5()
digest.update(content)
return digest.hexdigest() if not self.fail_checksum else 'invalid'
def get_object(self, container, path, resp_chunk_size=None): def get_object(self, container, path, resp_chunk_size=None):
data = self.containers[container].get(path, {}) data = self.containers[container].get(path, {})
if 'X-Object-Manifest' in data['headers']: if 'X-Object-Manifest' in data['headers']:
@ -124,6 +133,13 @@ def test_stream_read_write():
assert swift.get_content('somepath') == 'some content here' assert swift.get_content('somepath') == 'some content here'
assert ''.join(list(swift.stream_read('somepath'))) == 'some content here' assert ''.join(list(swift.stream_read('somepath'))) == 'some content here'
def test_stream_read_write_invalid_checksum():
swift = FakeSwiftStorage(fail_checksum=True, **base_args)
assert not swift.exists('somepath')
with pytest.raises(IOError):
swift.stream_write('somepath', io.BytesIO('some content here'))
def test_remove(): def test_remove():
swift = FakeSwiftStorage(**base_args) swift = FakeSwiftStorage(**base_args)
assert not swift.exists('somepath') assert not swift.exists('somepath')