quay/storage/cloud.py

import cStringIO as StringIO
import os
import logging

import boto.s3.connection
import boto.s3.multipart
import boto.gs.connection
import boto.s3.key
import boto.gs.key

from io import BufferedIOBase
from uuid import uuid4

from storage.basestorage import BaseStorageV2, InvalidChunkException


logger = logging.getLogger(__name__)


_MULTIPART_UPLOAD_ID_KEY = 'upload_id'
_LAST_PART_KEY = 'last_part_num'
_LAST_CHUNK_ENCOUNTERED = 'last_chunk_encountered'


class StreamReadKeyAsFile(BufferedIOBase):
  def __init__(self, key):
    self._key = key

  def read(self, amt=None):
    if self.closed:
      return None

    resp = self._key.read(amt)
    return resp

  def readable(self):
    return True

  @property
  def closed(self):
    return self._key.closed

  def close(self):
    self._key.close(fast=True)


class _CloudStorage(BaseStorageV2):
  def __init__(self, connection_class, key_class, connect_kwargs, upload_params, storage_path,
               access_key, secret_key, bucket_name):
    super(_CloudStorage, self).__init__()

    self.upload_chunk_size = 5 * 1024 * 1024

    self._initialized = False
    self._bucket_name = bucket_name
    self._access_key = access_key
    self._secret_key = secret_key
    self._root_path = storage_path
    self._connection_class = connection_class
    self._key_class = key_class
    self._upload_params = upload_params
    self._connect_kwargs = connect_kwargs
    self._cloud_conn = None
    self._cloud_bucket = None

  def _initialize_cloud_conn(self):
    if not self._initialized:
      self._cloud_conn = self._connection_class(self._access_key, self._secret_key,
                                                **self._connect_kwargs)
      self._cloud_bucket = self._cloud_conn.get_bucket(self._bucket_name)
      self._initialized = True

  def _debug_key(self, key):
    """Used for debugging only."""
    orig_meth = key.bucket.connection.make_request

    def new_meth(*args, **kwargs):
      print '#' * 16
      print args
      print kwargs
      print '#' * 16
      return orig_meth(*args, **kwargs)
    key.bucket.connection.make_request = new_meth

  def _init_path(self, path=None):
    path = os.path.join(self._root_path, path) if path else self._root_path
    if path and path[0] == '/':
      return path[1:]
    return path

  def get_cloud_conn(self):
    self._initialize_cloud_conn()
    return self._cloud_conn

  def get_cloud_bucket(self):
    return self._cloud_bucket

  def get_content(self, path):
    self._initialize_cloud_conn()
    path = self._init_path(path)
    key = self._key_class(self._cloud_bucket, path)
    if not key.exists():
      raise IOError('No such key: \'{0}\''.format(path))
    return key.get_contents_as_string()

  def put_content(self, path, content):
    self._initialize_cloud_conn()
    path = self._init_path(path)
    key = self._key_class(self._cloud_bucket, path)
    key.set_contents_from_string(content, **self._upload_params)
    return path

  def get_supports_resumable_downloads(self):
    return True

  def get_direct_download_url(self, path, expires_in=60, requires_cors=False):
    self._initialize_cloud_conn()
    path = self._init_path(path)
    k = self._key_class(self._cloud_bucket, path)
    return k.generate_url(expires_in)

  def get_direct_upload_url(self, path, mime_type, requires_cors=True):
    self._initialize_cloud_conn()
    path = self._init_path(path)
    key = self._key_class(self._cloud_bucket, path)
    url = key.generate_url(300, 'PUT', headers={'Content-Type': mime_type}, encrypt_key=True)
    return url

  def stream_read(self, path):
    self._initialize_cloud_conn()
    path = self._init_path(path)
    key = self._key_class(self._cloud_bucket, path)
    if not key.exists():
      raise IOError('No such key: \'{0}\''.format(path))
    while True:
      buf = key.read(self.buffer_size)
      if not buf:
        break
      yield buf

  def stream_read_file(self, path):
    self._initialize_cloud_conn()
    path = self._init_path(path)
    key = self._key_class(self._cloud_bucket, path)
    if not key.exists():
      raise IOError('No such key: \'{0}\''.format(path))
    return StreamReadKeyAsFile(key)

  def __initiate_multipart_upload(self, path, content_type, content_encoding):
    # Minimum size of upload part size on S3 is 5MB
    self._initialize_cloud_conn()
    path = self._init_path(path)

    metadata = {}
    if content_type is not None:
      metadata['Content-Type'] = content_type

    if content_encoding is not None:
      metadata['Content-Encoding'] = content_encoding

    return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata,
                                                        **self._upload_params)

  def stream_write(self, path, fp, content_type=None, content_encoding=None):
    mp = self.__initiate_multipart_upload(path, content_type, content_encoding)
    num_part = 1
    while True:
      try:
        buf = StringIO.StringIO()
        bytes_written = self.stream_write_to_fp(fp, buf, self.upload_chunk_size)
        if bytes_written == 0:
          break

        mp.upload_part_from_file(buf, num_part)
        num_part += 1
        io.close()
      except IOError:
        break
    mp.complete_upload()

  def list_directory(self, path=None):
    self._initialize_cloud_conn()
    path = self._init_path(path)
    if not path.endswith('/'):
      path += '/'
    ln = 0
    if self._root_path != '/':
      ln = len(self._root_path)
    exists = False
    for key in self._cloud_bucket.list(prefix=path, delimiter='/'):
      exists = True
      name = key.name
      if name.endswith('/'):
        yield name[ln:-1]
      else:
        yield name[ln:]
    if exists is False:
      # In order to be compliant with the LocalStorage API. Even though
      # S3 does not have a concept of folders.
      raise OSError('No such directory: \'{0}\''.format(path))

  def exists(self, path):
    self._initialize_cloud_conn()
    path = self._init_path(path)
    key = self._key_class(self._cloud_bucket, path)
    return key.exists()

  def remove(self, path):
    self._initialize_cloud_conn()
    path = self._init_path(path)
    key = self._key_class(self._cloud_bucket, path)
    if key.exists():
      # It's a file
      key.delete()
      return
    # We assume it's a directory
    if not path.endswith('/'):
      path += '/'
    for key in self._cloud_bucket.list(prefix=path):
      key.delete()

  def get_checksum(self, path):
    self._initialize_cloud_conn()
    path = self._init_path(path)
    key = self._key_class(self._cloud_bucket, path)
    k = self._cloud_bucket.lookup(key)
    if k is None:
      raise IOError('No such key: \'{0}\''.format(path))

    return k.etag[1:-1][:7]

  def _rel_upload_path(self, uuid):
    return 'uploads/{0}'.format(uuid)

  def initiate_chunked_upload(self):
    self._initialize_cloud_conn()
    random_uuid = str(uuid4())
    path = self._init_path(self._rel_upload_path(random_uuid))
    mpu = self.__initiate_multipart_upload(path, content_type=None, content_encoding=None)

    metadata = {
      _MULTIPART_UPLOAD_ID_KEY: mpu.id,
      _LAST_PART_KEY: 0,
      _LAST_CHUNK_ENCOUNTERED: False,
    }

    return mpu.id, metadata

  def _get_multipart_upload_key(self, uuid, storage_metadata):
    mpu = boto.s3.multipart.MultiPartUpload(self._cloud_bucket)
    mpu.id = storage_metadata[_MULTIPART_UPLOAD_ID_KEY]
    mpu.key = self._init_path(self._rel_upload_path(uuid))
    return mpu

  def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata):
    self._initialize_cloud_conn()
    mpu = self._get_multipart_upload_key(uuid, storage_metadata)
    last_part_num = storage_metadata[_LAST_PART_KEY]

    if storage_metadata[_LAST_CHUNK_ENCOUNTERED] and length != 0:
      msg = 'Length must be at least the the upload chunk size: %s' % self.upload_chunk_size
      raise InvalidChunkException(msg)

    part_num = last_part_num + 1
    mpu.upload_part_from_file(in_fp, part_num, length)

    new_metadata = {
      _MULTIPART_UPLOAD_ID_KEY: mpu.id,
      _LAST_PART_KEY: part_num,
      _LAST_CHUNK_ENCOUNTERED: True,
    }

    return length, new_metadata

  def complete_chunked_upload(self, uuid, final_path, storage_metadata):
    mpu = self._get_multipart_upload_key(uuid, storage_metadata)
    mpu.complete_upload()

  def cancel_chunked_upload(self, uuid, storage_metadata):
    mpu = self._get_multipart_upload_key(uuid, storage_metadata)
    mpu.cancel_multipart_upload()


class S3Storage(_CloudStorage):
  def __init__(self, storage_path, s3_access_key, s3_secret_key, s3_bucket):
    upload_params = {
      'encrypt_key': True,
    }
    connect_kwargs = {}
    super(S3Storage, self).__init__(boto.s3.connection.S3Connection, boto.s3.key.Key,
                                    connect_kwargs, upload_params, storage_path, s3_access_key,
                                    s3_secret_key, s3_bucket)

  def setup(self):
    self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
      <CORSConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
          <CORSRule>
              <AllowedOrigin>*</AllowedOrigin>
              <AllowedMethod>GET</AllowedMethod>
              <MaxAgeSeconds>3000</MaxAgeSeconds>
              <AllowedHeader>Authorization</AllowedHeader>
          </CORSRule>
          <CORSRule>
              <AllowedOrigin>*</AllowedOrigin>
              <AllowedMethod>PUT</AllowedMethod>
              <MaxAgeSeconds>3000</MaxAgeSeconds>
              <AllowedHeader>Content-Type</AllowedHeader>
              <AllowedHeader>x-amz-acl</AllowedHeader>
              <AllowedHeader>origin</AllowedHeader>
          </CORSRule>
      </CORSConfiguration>""")


class GoogleCloudStorage(_CloudStorage):
  def __init__(self, storage_path, access_key, secret_key, bucket_name):
    upload_params = {}
    connect_kwargs = {}
    super(GoogleCloudStorage, self).__init__(boto.gs.connection.GSConnection, boto.gs.key.Key,
                                             connect_kwargs, upload_params, storage_path,
                                             access_key, secret_key, bucket_name)

  def setup(self):
    self.get_cloud_bucket().set_cors_xml("""<?xml version="1.0" encoding="UTF-8"?>
      <CorsConfig>
        <Cors>
          <Origins>
            <Origin>*</Origin>
          </Origins>
          <Methods>
            <Method>GET</Method>
            <Method>PUT</Method>
          </Methods>
          <ResponseHeaders>
            <ResponseHeader>Content-Type</ResponseHeader>
          </ResponseHeaders>
          <MaxAgeSec>3000</MaxAgeSec>
        </Cors>
      </CorsConfig>""")

  def stream_write(self, path, fp, content_type=None, content_encoding=None):
    # Minimum size of upload part size on S3 is 5MB
    self._initialize_cloud_conn()
    path = self._init_path(path)
    key = self._key_class(self._cloud_bucket, path)

    if content_type is not None:
      key.set_metadata('Content-Type', content_type)

    if content_encoding is not None:
      key.set_metadata('Content-Encoding', content_encoding)

    key.set_contents_from_stream(fp)


class RadosGWStorage(_CloudStorage):
  def __init__(self, hostname, is_secure, storage_path, access_key, secret_key, bucket_name):
    upload_params = {}
    connect_kwargs = {
      'host': hostname,
      'is_secure': is_secure,
      'calling_format': boto.s3.connection.OrdinaryCallingFormat(),
    }
    super(RadosGWStorage, self).__init__(boto.s3.connection.S3Connection, boto.s3.key.Key,
                                         connect_kwargs, upload_params, storage_path, access_key,
                                         secret_key, bucket_name)

  # TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
  def get_direct_download_url(self, path, expires_in=60, requires_cors=False):
    if requires_cors:
      return None

    return super(RadosGWStorage, self).get_direct_download_url(path, expires_in, requires_cors)

  # TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624
  def get_direct_upload_url(self, path, mime_type, requires_cors=True):
    if requires_cors:
      return None

    return super(RadosGWStorage, self).get_direct_upload_url(path, mime_type, requires_cors)