""" Azure storage driver. Based on: https://docs.microsoft.com/en-us/azure/storage/blobs/storage-python-how-to-use-blob-storage """ import logging import os import io import uuid import copy import time from datetime import datetime, timedelta from azure.common import AzureException from azure.storage.blob import BlockBlobService, ContentSettings, BlobBlock, ContainerPermissions from azure.storage.common.models import CorsRule from storage.basestorage import BaseStorage from util.registry.filelike import LimitingStream, READ_UNTIL_END logger = logging.getLogger(__name__) _COPY_POLL_SLEEP = 0.25 # seconds _MAX_COPY_POLL_COUNT = 120 # _COPY_POLL_SLEEPs => 120s _MAX_BLOCK_SIZE = 1024 * 1024 * 100 # 100MB _BLOCKS_KEY = 'blocks' _CONTENT_TYPE_KEY = 'content-type' class AzureStorage(BaseStorage): def __init__(self, context, azure_container, storage_path, azure_account_name, azure_account_key=None, sas_token=None, connection_string=None, is_emulated=False, socket_timeout=20, request_timeout=20): super(AzureStorage, self).__init__() self._context = context self._storage_path = storage_path.lstrip('/') self._azure_account_name = azure_account_key self._azure_account_key = azure_account_key self._azure_sas_token = sas_token self._azure_container = azure_container self._azure_connection_string = connection_string self._request_timeout = request_timeout self._blob_service = BlockBlobService(account_name=azure_account_name, account_key=azure_account_key, sas_token=sas_token, is_emulated=is_emulated, connection_string=connection_string, socket_timeout=socket_timeout) def _blob_name_from_path(self, object_path): if '..' in object_path: raise Exception('Relative paths are not allowed; found %s' % object_path) return os.path.join(self._storage_path, object_path).rstrip('/') def _upload_blob_path_from_uuid(self, uuid): return self._blob_name_from_path(self._upload_blob_name_from_uuid(uuid)) def _upload_blob_name_from_uuid(self, uuid): return 'uploads/{0}'.format(uuid) def get_direct_download_url(self, object_path, request_ip=None, expires_in=60, requires_cors=False, head=False): blob_name = self._blob_name_from_path(object_path) try: sas_token = self._blob_service.generate_blob_shared_access_signature( self._azure_container, blob_name, ContainerPermissions.READ, datetime.utcnow() + timedelta(seconds=expires_in)) blob_url = self._blob_service.make_blob_url(self._azure_container, blob_name, sas_token=sas_token) except AzureException: logger.exception('Exception when trying to get direct download for path %s', object_path) raise IOError('Exception when trying to get direct download') return blob_url def validate(self, client): super(AzureStorage, self).validate(client) self._blob_service.get_container_properties(self._azure_container, timeout=self._request_timeout) def get_content(self, path): blob_name = self._blob_name_from_path(path) try: blob = self._blob_service.get_blob_to_bytes(self._azure_container, blob_name) except AzureException: logger.exception('Exception when trying to get path %s', path) raise IOError('Exception when trying to get path') return blob.content def put_content(self, path, content): blob_name = self._blob_name_from_path(path) try: self._blob_service.create_blob_from_bytes(self._azure_container, blob_name, content) except AzureException: logger.exception('Exception when trying to put path %s', path) raise IOError('Exception when trying to put path') def stream_read(self, path): with self.stream_read_file(path) as f: while True: buf = f.read(self.buffer_size) if not buf: break yield buf def stream_read_file(self, path): blob_name = self._blob_name_from_path(path) try: output_stream = io.BytesIO() self._blob_service.get_blob_to_stream(self._azure_container, blob_name, output_stream) output_stream.seek(0) except AzureException: logger.exception('Exception when trying to stream_file_read path %s', path) raise IOError('Exception when trying to stream_file_read path') return output_stream def stream_write(self, path, fp, content_type=None, content_encoding=None): blob_name = self._blob_name_from_path(path) content_settings = ContentSettings( content_type=content_type, content_encoding=content_encoding, ) try: self._blob_service.create_blob_from_stream(self._azure_container, blob_name, fp, content_settings=content_settings) except AzureException: logger.exception('Exception when trying to stream_write path %s', path) raise IOError('Exception when trying to stream_write path') def exists(self, path): blob_name = self._blob_name_from_path(path) try: return self._blob_service.exists(self._azure_container, blob_name, timeout=self._request_timeout) except AzureException: logger.exception('Exception when trying to check exists path %s', path) raise IOError('Exception when trying to check exists path') def remove(self, path): blob_name = self._blob_name_from_path(path) try: self._blob_service.delete_blob(self._azure_container, blob_name) except AzureException: logger.exception('Exception when trying to remove path %s', path) raise IOError('Exception when trying to remove path') def get_checksum(self, path): blob_name = self._blob_name_from_path(path) try: blob = self._blob_service.get_blob_properties(self._azure_container, blob_name) except AzureException: logger.exception('Exception when trying to get_checksum for path %s', path) raise IOError('Exception when trying to get_checksum path') return blob.properties.etag def initiate_chunked_upload(self): random_uuid = str(uuid.uuid4()) metadata = { _BLOCKS_KEY: [], _CONTENT_TYPE_KEY: None, } return random_uuid, metadata def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata, content_type=None): if length == 0: return 0, storage_metadata, None upload_blob_path = self._upload_blob_path_from_uuid(uuid) new_metadata = copy.deepcopy(storage_metadata) total_bytes_written = 0 while True: current_length = length - total_bytes_written max_length = (min(current_length, _MAX_BLOCK_SIZE) if length != READ_UNTIL_END else _MAX_BLOCK_SIZE) if max_length <= 0: break limited = LimitingStream(in_fp, max_length, seekable=False) # Note: Azure fails if a zero-length block is uploaded, so we read all the data here, # and, if there is none, terminate early. block_data = b'' for chunk in iter(lambda: limited.read(4096), b""): block_data += chunk if len(block_data) == 0: break block_index = len(new_metadata[_BLOCKS_KEY]) block_id = format(block_index, '05') new_metadata[_BLOCKS_KEY].append(block_id) try: self._blob_service.put_block(self._azure_container, upload_blob_path, block_data, block_id, validate_content=True) except AzureException as ae: logger.exception('Exception when trying to stream_upload_chunk block %s for %s', block_id, uuid) return total_bytes_written, new_metadata, ae bytes_written = len(block_data) total_bytes_written += bytes_written if bytes_written == 0 or bytes_written < max_length: break if content_type is not None: new_metadata[_CONTENT_TYPE_KEY] = content_type return total_bytes_written, new_metadata, None def complete_chunked_upload(self, uuid, final_path, storage_metadata): """ Complete the chunked upload and store the final results in the path indicated. Returns nothing. """ # Commit the blob's blocks. upload_blob_path = self._upload_blob_path_from_uuid(uuid) block_list = [BlobBlock(block_id) for block_id in storage_metadata[_BLOCKS_KEY]] try: self._blob_service.put_block_list(self._azure_container, upload_blob_path, block_list) except AzureException: logger.exception('Exception when trying to put block list for path %s from upload %s', final_path, uuid) raise IOError('Exception when trying to put block list') # Set the content type on the blob if applicable. if storage_metadata[_CONTENT_TYPE_KEY] is not None: content_settings = ContentSettings(content_type=storage_metadata[_CONTENT_TYPE_KEY]) try: self._blob_service.set_blob_properties(self._azure_container, upload_blob_path, content_settings=content_settings) except AzureException: logger.exception('Exception when trying to set blob properties for path %s', final_path) raise IOError('Exception when trying to set blob properties') # Copy the blob to its final location. upload_blob_name = self._upload_blob_name_from_uuid(uuid) copy_source_url = self.get_direct_download_url(upload_blob_name, expires_in=300) try: blob_name = self._blob_name_from_path(final_path) copy_prop = self._blob_service.copy_blob(self._azure_container, blob_name, copy_source_url) except AzureException: logger.exception('Exception when trying to set copy uploaded blob %s to path %s', uuid, final_path) raise IOError('Exception when trying to copy uploaded blob') self._await_copy(self._azure_container, blob_name, copy_prop) # Delete the original blob. logger.debug('Deleting chunked upload %s at path %s', uuid, upload_blob_path) try: self._blob_service.delete_blob(self._azure_container, upload_blob_path) except AzureException: logger.exception('Exception when trying to set delete uploaded blob %s', uuid) raise IOError('Exception when trying to delete uploaded blob') def cancel_chunked_upload(self, uuid, storage_metadata): """ Cancel the chunked upload and clean up any outstanding partially uploaded data. Returns nothing. """ upload_blob_path = self._upload_blob_path_from_uuid(uuid) logger.debug('Canceling chunked upload %s at path %s', uuid, upload_blob_path) self._blob_service.delete_blob(self._azure_container, upload_blob_path) def _await_copy(self, container, blob_name, copy_prop): # Poll for copy completion. count = 0 while copy_prop.status == 'pending': props = self._blob_service.get_blob_properties(container, blob_name) copy_prop = props.properties.copy if copy_prop.status == 'success': return if copy_prop.status == 'failed' or copy_prop.status == 'aborted': raise IOError('Copy of blob %s failed with status %s' % (blob_name, copy_prop.status)) count = count + 1 if count > _MAX_COPY_POLL_COUNT: raise IOError('Timed out waiting for copy to complete') time.sleep(_COPY_POLL_SLEEP) def copy_to(self, destination, path): if (self.__class__ == destination.__class__): logger.debug('Starting copying file from Azure %s to Azure %s via an Azure copy', self._azure_container, destination) blob_name = self._blob_name_from_path(path) copy_source_url = self.get_direct_download_url(path) copy_prop = self._blob_service.copy_blob(destination._azure_container, blob_name, copy_source_url) self._await_copy(destination._azure_container, blob_name, copy_prop) logger.debug('Finished copying file from Azure %s to Azure %s via an Azure copy', self._azure_container, destination) return # Fallback to a slower, default copy. logger.debug('Copying file from Azure container %s to %s via a streamed copy', self._azure_container, destination) with self.stream_read_file(path) as fp: destination.stream_write(path, fp) def setup(self): # From: https://docs.microsoft.com/en-us/rest/api/storageservices/cross-origin-resource-sharing--cors--support-for-the-azure-storage-services cors = [CorsRule(allowed_origins='*', allowed_methods=['GET', 'PUT'], max_age_in_seconds=3000, exposed_headers=['x-ms-meta-*'], allowed_headers=['x-ms-meta-data*', 'x-ms-meta-target*', 'x-ms-meta-abc', 'Content-Type'])] self._blob_service.set_blob_service_properties(cors=cors)