This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/storage/swift.py
2016-08-02 15:01:37 -04:00

302 lines
11 KiB
Python

""" Swift storage driver.
Uses: http://docs.openstack.org/developer/swift/overview_large_objects.html
"""
import os.path
import copy
import hmac
import string
import logging
from uuid import uuid4
from swiftclient.client import Connection, ClientException
from urlparse import urlparse
from random import SystemRandom
from hashlib import sha1
from time import time
from collections import namedtuple
from util.registry import filelike
from storage.basestorage import BaseStorage
from util.registry.generatorfile import GeneratorFile
logger = logging.getLogger(__name__)
_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length'])
_SEGMENTS_KEY = 'segments'
_SEGMENT_DIRECTORY = 'segments'
_MAXIMUM_SEGMENT_SIZE = 5000000000 # 5 GB
_DEFAULT_SWIFT_CONNECT_TIMEOUT = 5 # seconds
class SwiftStorage(BaseStorage):
def __init__(self, metric_queue, swift_container, storage_path, auth_url, swift_user,
swift_password, auth_version=None, os_options=None, ca_cert_path=None,
temp_url_key=None, simple_path_concat=False, connect_timeout=None,
retry_count=None, retry_on_ratelimit=True):
super(SwiftStorage, self).__init__()
self._swift_container = swift_container
self._storage_path = storage_path.lstrip('/')
self._simple_path_concat = simple_path_concat
self._auth_url = auth_url
self._ca_cert_path = ca_cert_path
self._swift_user = swift_user
self._swift_password = swift_password
self._temp_url_key = temp_url_key
self._connect_timeout = connect_timeout
self._retry_count = retry_count
self._retry_on_ratelimit = retry_on_ratelimit
try:
self._auth_version = int(auth_version or '2')
except ValueError:
self._auth_version = 2
self._os_options = os_options or {}
self._initialized = False
def _get_connection(self):
return Connection(
authurl=self._auth_url,
cacert=self._ca_cert_path,
user=self._swift_user,
key=self._swift_password,
auth_version=self._auth_version,
os_options=self._os_options,
retry_on_ratelimit=self._retry_on_ratelimit,
timeout=self._connect_timeout or _DEFAULT_SWIFT_CONNECT_TIMEOUT,
retries=self._retry_count or 5,
)
def _normalize_path(self, object_path):
""" No matter what inputs we get, we are going to return a path without a leading or trailing
'/'
"""
if self._simple_path_concat:
return (self._storage_path + object_path).rstrip('/')
else:
return os.path.join(self._storage_path, object_path).rstrip('/')
def _get_object(self, path, chunk_size=None):
path = self._normalize_path(path)
try:
_, obj = self._get_connection().get_object(self._swift_container, path,
resp_chunk_size=chunk_size)
return obj
except Exception as ex:
logger.exception('Could not get object at path %s: %s', path, ex)
raise IOError('Path %s not found' % path)
def _put_object(self, path, content, chunk=None, content_type=None, content_encoding=None,
headers=None):
path = self._normalize_path(path)
headers = headers or {}
if content_encoding is not None:
headers['Content-Encoding'] = content_encoding
try:
self._get_connection().put_object(self._swift_container, path, content,
chunk_size=chunk, content_type=content_type,
headers=headers)
except ClientException:
# We re-raise client exception here so that validation of config during setup can see
# the client exception messages.
raise
except Exception as ex:
logger.exception('Could not put object at path %s: %s', path, ex)
raise IOError("Could not put content: %s" % path)
def _head_object(self, path):
path = self._normalize_path(path)
try:
return self._get_connection().head_object(self._swift_container, path)
except Exception as ex:
logger.exception('Could not head object at path %s: %s', path, ex)
return None
def get_direct_download_url(self, object_path, expires_in=60, requires_cors=False, head=False):
if requires_cors:
return None
# Reference: http://docs.openstack.org/juno/config-reference/content/object-storage-tempurl.html
if not self._temp_url_key:
return None
# Retrieve the auth details for the connection.
try:
object_url_value, _ = self._get_connection().get_auth()
except ClientException:
logger.exception('Got client exception when trying to load Swift auth')
return None
object_url = urlparse(object_url_value)
scheme = object_url.scheme
path = object_url.path.rstrip('/')
hostname = object_url.netloc
object_path = self._normalize_path(object_path)
# Generate the signed HMAC body.
method = 'HEAD' if head else 'GET'
expires = int(time() + expires_in)
full_path = '%s/%s/%s' % (path, self._swift_container, object_path)
hmac_body = '%s\n%s\n%s' % (method, expires, full_path)
sig = hmac.new(self._temp_url_key.encode('utf-8'), hmac_body.encode('utf-8'), sha1).hexdigest()
surl = '{scheme}://{host}{full_path}?temp_url_sig={sig}&temp_url_expires={expires}'
return surl.format(scheme=scheme, host=hostname, full_path=full_path, sig=sig, expires=expires)
def validate(self, client):
super(SwiftStorage, self).validate()
if self._temp_url_key:
# Generate a direct download URL.
dd_url = self.get_direct_download_url('_verify')
if not dd_url:
raise Exception('Could not validate direct download URL; the token may be invalid.')
# Try to retrieve the direct download URL.
response = client.get(dd_url, timeout=2)
if response.status_code != 200:
logger.debug('Direct download failure: %s => %s with body %s', dd_url,
response.status_code, response.text)
msg = 'Direct download URL failed with status code %s. Please check your temp-url-key.'
raise Exception(msg % response.status_code)
def get_content(self, path):
return self._get_object(path)
def put_content(self, path, content):
self._put_object(path, content)
def stream_read(self, path):
for data in self._get_object(path, self.buffer_size):
yield data
def stream_read_file(self, path):
return GeneratorFile(self.stream_read(path))
def stream_write(self, path, fp, content_type=None, content_encoding=None):
self._put_object(path, fp, self.buffer_size, content_type=content_type,
content_encoding=content_encoding)
def exists(self, path):
return bool(self._head_object(path))
def remove(self, path):
path = self._normalize_path(path)
try:
self._get_connection().delete_object(self._swift_container, path)
except Exception:
raise IOError('Cannot delete path: %s' % path)
def _random_checksum(self, count):
chars = string.ascii_uppercase + string.digits
return ''.join(SystemRandom().choice(chars) for _ in range(count))
def get_checksum(self, path):
headers = self._head_object(path)
if not headers:
raise IOError('Cannot lookup path: %s' % path)
return headers.get('etag', '')[1:-1][:7] or self._random_checksum(7)
@staticmethod
def _segment_list_from_metadata(storage_metadata):
return [_PartUploadMetadata(*segment_args) for segment_args in storage_metadata[_SEGMENTS_KEY]]
def initiate_chunked_upload(self):
random_uuid = str(uuid4())
metadata = {
_SEGMENTS_KEY: [],
}
return random_uuid, metadata
def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None):
if length == 0:
return 0, storage_metadata, None
# Note: Swift limits segments to a maximum of 5GB, so we keep writing segments until we
# are finished hitting the data limit.
total_bytes_written = 0
upload_error = None
while True:
try:
bytes_written, storage_metadata = self._stream_upload_segment(uuid, offset, length, in_fp,
storage_metadata,
content_type)
except IOError as ex:
message = ('Error writing to stream in stream_upload_chunk for uuid %s (offset %s' +
', length %s, metadata: %s): %s')
logger.exception(message, uuid, offset, length, storage_metadata, ex)
upload_error = ex
break
if length != filelike.READ_UNTIL_END:
length = length - bytes_written
offset = offset + bytes_written
total_bytes_written = total_bytes_written + bytes_written
if bytes_written == 0 or length <= 0:
return total_bytes_written, storage_metadata, upload_error
return total_bytes_written, storage_metadata, upload_error
def _stream_upload_segment(self, uuid, offset, length, in_fp, storage_metadata, content_type):
updated_metadata = copy.deepcopy(storage_metadata)
segment_count = len(updated_metadata[_SEGMENTS_KEY])
segment_path = '%s/%s/%s' % (_SEGMENT_DIRECTORY, uuid, segment_count)
# Track the number of bytes read and if an explicit length is specified, limit the
# file stream to that length.
if length == filelike.READ_UNTIL_END:
length = _MAXIMUM_SEGMENT_SIZE
else:
length = min(_MAXIMUM_SEGMENT_SIZE, length)
limiting_fp = filelike.LimitingStream(in_fp, length)
# Write the segment to Swift.
self.stream_write(segment_path, limiting_fp, content_type)
# We are only going to track keys to which data was confirmed written.
bytes_written = limiting_fp.tell()
if bytes_written > 0:
updated_metadata[_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset,
bytes_written))
return bytes_written, updated_metadata
def complete_chunked_upload(self, uuid, final_path, storage_metadata):
""" Complete the chunked upload and store the final results in the path indicated.
Returns nothing.
"""
# Finally, we write an empty file at the proper location with a X-Object-Manifest
# header pointing to the prefix for the segments.
segments_prefix_path = self._normalize_path('%s/%s' % (_SEGMENT_DIRECTORY, uuid))
contained_segments_prefix_path = '%s/%s' % (self._swift_container, segments_prefix_path)
self._put_object(final_path, '', headers={'X-Object-Manifest': contained_segments_prefix_path})
def cancel_chunked_upload(self, uuid, storage_metadata):
""" Cancel the chunked upload and clean up any outstanding partially uploaded data.
Returns nothing.
"""
# Delete all the uploaded segments.
for segment in SwiftStorage._segment_list_from_metadata(storage_metadata):
self.remove(segment.path)