From 9eccdb7696d7b44c4bc9330e246e7e3323d697e5 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 8 Sep 2014 12:00:20 -0400 Subject: [PATCH 01/10] Fix NPE --- static/js/controllers.js | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/static/js/controllers.js b/static/js/controllers.js index e4e364c87..9131a0140 100644 --- a/static/js/controllers.js +++ b/static/js/controllers.js @@ -1647,14 +1647,17 @@ function UserAdminCtrl($scope, $timeout, $location, ApiService, PlanService, Use if ($scope.cuser.logins) { for (var i = 0; i < $scope.cuser.logins.length; i++) { - if ($scope.cuser.logins[i].service == 'github') { + var login = $scope.cuser.logins[i]; + login.metadata = login.metadata || {}; + + if (login.service == 'github') { $scope.hasGithubLogin = true; - $scope.githubLogin = $scope.cuser.logins[i].metadata['service_username']; + $scope.githubLogin = login.metadata['service_username']; } - if ($scope.cuser.logins[i].service == 'google') { + if (login.service == 'google') { $scope.hasGoogleLogin = true; - $scope.googleLogin = $scope.cuser.logins[i].metadata['service_username']; + $scope.googleLogin = login.metadata['service_username']; } } } From dd4037e3243f0c59ff79f134be2110e5a2658957 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 8 Sep 2014 12:17:00 -0400 Subject: [PATCH 02/10] Allow github trigger setup folder paths to be specified even if a Dockerfile is not found --- static/directives/dropdown-select.html | 2 +- static/directives/trigger-setup-github.html | 3 ++- static/js/app.js | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/static/directives/dropdown-select.html b/static/directives/dropdown-select.html index c1157e3d0..69404e161 100644 --- a/static/directives/dropdown-select.html +++ b/static/directives/dropdown-select.html @@ -2,7 +2,7 @@
+ ng-readonly="!allowCustomInput">
From 29d40db5ea532e069ec8ceb8c9cb1d38d30724f3 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Tue, 9 Sep 2014 15:54:03 -0400 Subject: [PATCH 04/10] Add a new RadosGW storage engine. Allow engines to distinguish not only between those that can support direct uploads and downloads, but those that support doing it through the browser. Rename resumeable->resumable. --- app.py | 2 +- data/userfiles.py | 210 +++++++++------------------------- endpoints/api/build.py | 4 +- endpoints/registry.py | 6 +- storage/__init__.py | 3 +- storage/basestorage.py | 10 +- storage/cloud.py | 65 +++++++++-- storage/distributedstorage.py | 4 +- storage/fakestorage.py | 3 + storage/local.py | 12 +- test/testconfig.py | 2 +- workers/dockerfilebuild.py | 2 +- 12 files changed, 147 insertions(+), 176 deletions(-) diff --git a/app.py b/app.py index 81c59a30c..bcc4e86d7 100644 --- a/app.py +++ b/app.py @@ -88,7 +88,7 @@ Principal(app, use_sessions=False) login_manager = LoginManager(app) mail = Mail(app) storage = Storage(app) -userfiles = Userfiles(app) +userfiles = Userfiles(app, storage) analytics = Analytics(app) billing = Billing(app) sentry = Sentry(app) diff --git a/data/userfiles.py b/data/userfiles.py index 79fbcb507..e6d21c1c1 100644 --- a/data/userfiles.py +++ b/data/userfiles.py @@ -1,110 +1,31 @@ -import boto import os import logging -import hashlib import magic -from boto.s3.key import Key from uuid import uuid4 from flask import url_for, request, send_file, make_response, abort from flask.views import View - logger = logging.getLogger(__name__) -class FakeUserfiles(object): - def prepare_for_drop(self, mime_type): - return ('http://fake/url', uuid4()) - - def store_file(self, file_like_obj, content_type): - raise NotImplementedError() - - def get_file_url(self, file_id, expires_in=300): - return ('http://fake/url') - - def get_file_checksum(self, file_id): - return 'abcdefg' - - -class S3FileWriteException(Exception): - pass - - -class S3Userfiles(object): - def __init__(self, path, s3_access_key, s3_secret_key, bucket_name): - self._initialized = False - self._bucket_name = bucket_name - self._access_key = s3_access_key - self._secret_key = s3_secret_key - self._prefix = path - self._s3_conn = None - self._bucket = None - - def _initialize_s3(self): - if not self._initialized: - self._s3_conn = boto.connect_s3(self._access_key, self._secret_key) - self._bucket = self._s3_conn.get_bucket(self._bucket_name) - self._initialized = True - - def prepare_for_drop(self, mime_type): - """ Returns a signed URL to upload a file to our bucket. """ - self._initialize_s3() - logger.debug('Requested upload url with content type: %s' % mime_type) - file_id = str(uuid4()) - full_key = os.path.join(self._prefix, file_id) - k = Key(self._bucket, full_key) - url = k.generate_url(300, 'PUT', headers={'Content-Type': mime_type}, - encrypt_key=True) - return (url, file_id) - - def store_file(self, file_like_obj, content_type): - self._initialize_s3() - file_id = str(uuid4()) - full_key = os.path.join(self._prefix, file_id) - k = Key(self._bucket, full_key) - logger.debug('Setting s3 content type to: %s' % content_type) - k.set_metadata('Content-Type', content_type) - bytes_written = k.set_contents_from_file(file_like_obj, encrypt_key=True, - rewind=True) - - if bytes_written == 0: - raise S3FileWriteException('Unable to write file to S3') - - return file_id - - def get_file_url(self, file_id, expires_in=300, mime_type=None): - self._initialize_s3() - full_key = os.path.join(self._prefix, file_id) - k = Key(self._bucket, full_key) - headers = None - if mime_type: - headers={'Content-Type': mime_type} - - return k.generate_url(expires_in, headers=headers) - - def get_file_checksum(self, file_id): - self._initialize_s3() - full_key = os.path.join(self._prefix, file_id) - k = self._bucket.lookup(full_key) - return k.etag[1:-1][:7] - - class UserfilesHandlers(View): methods = ['GET', 'PUT'] - def __init__(self, local_userfiles): - self._userfiles = local_userfiles + def __init__(self, distributed_storage, location, files): + self._storage = distributed_storage + self._files = files + self._locations = {location} self._magic = magic.Magic(mime=True) def get(self, file_id): - path = self._userfiles.file_path(file_id) - if not os.path.exists(path): + path = self._files.get_file_id_path(file_id) + try: + file_stream = self._storage.stream_read_file(self._locations, path) + return send_file(file_stream) + except IOError: abort(404) - logger.debug('Sending path: %s' % path) - return send_file(path, mimetype=self._magic.from_file(path)) - def put(self, file_id): input_stream = request.stream if request.headers.get('transfer-encoding') == 'chunked': @@ -112,7 +33,8 @@ class UserfilesHandlers(View): # encoding (Gunicorn) input_stream = request.environ['wsgi.input'] - self._userfiles.store_stream(input_stream, file_id) + path = self._files.get_file_id_path(file_id) + self._storage.stream_write(self._locations, path, input_stream) return make_response('Okay') @@ -123,99 +45,79 @@ class UserfilesHandlers(View): return self.put(file_id) -class LocalUserfiles(object): - def __init__(self, app, path): - self._root_path = path - self._buffer_size = 64 * 1024 # 64 KB +class DelegateUserfiles(object): + def __init__(self, app, distributed_storage, location, path, handler_name): self._app = app + self._storage = distributed_storage + self._locations = {location} + self._prefix = path + self._handler_name = handler_name def _build_url_adapter(self): return self._app.url_map.bind(self._app.config['SERVER_HOSTNAME'], script_name=self._app.config['APPLICATION_ROOT'] or '/', url_scheme=self._app.config['PREFERRED_URL_SCHEME']) - def prepare_for_drop(self, mime_type): + def get_file_id_path(self, file_id): + return os.path.join(self._prefix, file_id) + + def prepare_for_drop(self, mime_type, requires_cors=True): + """ Returns a signed URL to upload a file to our bucket. """ + logger.debug('Requested upload url with content type: %s' % mime_type) file_id = str(uuid4()) - with self._app.app_context() as ctx: - ctx.url_adapter = self._build_url_adapter() - return (url_for('userfiles_handlers', file_id=file_id, _external=True), file_id) + path = self.get_file_id_path(file_id) + url = self._storage.get_direct_upload_url(self._locations, path, mime_type, requires_cors) - def file_path(self, file_id): - if '..' in file_id or file_id.startswith('/'): - raise RuntimeError('Invalid Filename') - return os.path.join(self._root_path, file_id) + if url is None: + with self._app.app_context() as ctx: + ctx.url_adapter = self._build_url_adapter() + return (url_for(self._handler_name, file_id=file_id, _external=True), file_id) - def store_stream(self, stream, file_id): - path = self.file_path(file_id) - dirname = os.path.dirname(path) - if not os.path.exists(dirname): - os.makedirs(dirname) - - with open(path, 'w') as to_write: - while True: - try: - buf = stream.read(self._buffer_size) - if not buf: - break - to_write.write(buf) - except IOError: - break + return (url, file_id) def store_file(self, file_like_obj, content_type): file_id = str(uuid4()) - - # Rewind the file to match what s3 does - file_like_obj.seek(0, os.SEEK_SET) - - self.store_stream(file_like_obj, file_id) + path = self.get_file_id_path(file_id) + self._storage.stream_write(self._locations, path, file_like_obj) return file_id - def get_file_url(self, file_id, expires_in=300): - with self._app.app_context() as ctx: - ctx.url_adapter = self._build_url_adapter() - return url_for('userfiles_handlers', file_id=file_id, _external=True) + def get_file_url(self, file_id, expires_in=300, requires_cors=False): + path = self.get_file_id_path(file_id) + url = self._storage.get_direct_download_url(self._locations, path, expires_in, requires_cors) + + if url is None: + with self._app.app_context() as ctx: + ctx.url_adapter = self._build_url_adapter() + return url_for(self._handler_name, file_id=file_id, _external=True) + + return url def get_file_checksum(self, file_id): - path = self.file_path(file_id) - sha_hash = hashlib.sha256() - with open(path, 'r') as to_hash: - while True: - buf = to_hash.read(self._buffer_size) - if not buf: - break - sha_hash.update(buf) - return sha_hash.hexdigest()[:7] + path = self.get_file_id_path(file_id) + return self._storage.get_checksum(self._locations, path) class Userfiles(object): - def __init__(self, app=None): + def __init__(self, app=None, distributed_storage=None): self.app = app if app is not None: - self.state = self.init_app(app) + self.state = self.init_app(app, distributed_storage) else: self.state = None - def init_app(self, app): - storage_type = app.config.get('USERFILES_TYPE', 'LocalUserfiles') - path = app.config.get('USERFILES_PATH', '') + def init_app(self, app, distributed_storage): + location = app.config.get('USERFILES_LOCATION') + path = app.config.get('USERFILES_PATH', None) - if storage_type == 'LocalUserfiles': - userfiles = LocalUserfiles(app, path) - app.add_url_rule('/userfiles/', - view_func=UserfilesHandlers.as_view('userfiles_handlers', - local_userfiles=userfiles)) + handler_name = 'userfiles_handlers' - elif storage_type == 'S3Userfiles': - access_key = app.config.get('USERFILES_AWS_ACCESS_KEY', '') - secret_key = app.config.get('USERFILES_AWS_SECRET_KEY', '') - bucket = app.config.get('USERFILES_S3_BUCKET', '') - userfiles = S3Userfiles(path, access_key, secret_key, bucket) + userfiles = DelegateUserfiles(app, distributed_storage, location, path, handler_name) - elif storage_type == 'FakeUserfiles': - userfiles = FakeUserfiles() - - else: - raise RuntimeError('Unknown userfiles type: %s' % storage_type) + app.add_url_rule('/userfiles/', + view_func=UserfilesHandlers.as_view(handler_name, + distributed_storage=distributed_storage, + location=location, + files=userfiles)) # register extension with app app.extensions = getattr(app, 'extensions', {}) diff --git a/endpoints/api/build.py b/endpoints/api/build.py index 21d554069..74677fadb 100644 --- a/endpoints/api/build.py +++ b/endpoints/api/build.py @@ -80,7 +80,7 @@ def build_status_view(build_obj, can_write=False): } if can_write: - resp['archive_url'] = user_files.get_file_url(build_obj.resource_key) + resp['archive_url'] = user_files.get_file_url(build_obj.resource_key, requires_cors=True) return resp @@ -257,7 +257,7 @@ class FileDropResource(ApiResource): def post(self): """ Request a URL to which a file may be uploaded. """ mime_type = request.get_json()['mimeType'] - (url, file_id) = user_files.prepare_for_drop(mime_type) + (url, file_id) = user_files.prepare_for_drop(mime_type, requires_cors=True) return { 'url': url, 'file_id': str(file_id), diff --git a/endpoints/registry.py b/endpoints/registry.py index 72633939e..94719905a 100644 --- a/endpoints/registry.py +++ b/endpoints/registry.py @@ -110,10 +110,10 @@ def head_image_layer(namespace, repository, image_id, headers): extra_headers = {} - # Add the Accept-Ranges header if the storage engine supports resumeable + # Add the Accept-Ranges header if the storage engine supports resumable # downloads. - if store.get_supports_resumeable_downloads(repo_image.storage.locations): - profile.debug('Storage supports resumeable downloads') + if store.get_supports_resumable_downloads(repo_image.storage.locations): + profile.debug('Storage supports resumable downloads') extra_headers['Accept-Ranges'] = 'bytes' resp = make_response('') diff --git a/storage/__init__.py b/storage/__init__.py index 6700dab0b..4d1134d4b 100644 --- a/storage/__init__.py +++ b/storage/__init__.py @@ -1,5 +1,5 @@ from storage.local import LocalStorage -from storage.cloud import S3Storage, GoogleCloudStorage +from storage.cloud import S3Storage, GoogleCloudStorage, RadosGWStorage from storage.fakestorage import FakeStorage from storage.distributedstorage import DistributedStorage @@ -8,6 +8,7 @@ STORAGE_DRIVER_CLASSES = { 'LocalStorage': LocalStorage, 'S3Storage': S3Storage, 'GoogleCloudStorage': GoogleCloudStorage, + 'RadosGWStorage': RadosGWStorage, } diff --git a/storage/basestorage.py b/storage/basestorage.py index 2d3727a5b..aa6434b8e 100644 --- a/storage/basestorage.py +++ b/storage/basestorage.py @@ -54,10 +54,13 @@ class BaseStorage(StoragePaths): # Set the IO buffer to 64kB buffer_size = 64 * 1024 - def get_direct_download_url(self, path, expires_in=60): + def get_direct_download_url(self, path, expires_in=60, requires_cors=False): return None - def get_supports_resumeable_downloads(self): + def get_direct_upload_url(self, path, mime_type, requires_cors=True): + return None + + def get_supports_resumable_downloads(self): return False def get_content(self, path): @@ -83,3 +86,6 @@ class BaseStorage(StoragePaths): def remove(self, path): raise NotImplementedError + + def get_checksum(self, path): + raise NotImplementedError \ No newline at end of file diff --git a/storage/cloud.py b/storage/cloud.py index d64415410..a576a6401 100644 --- a/storage/cloud.py +++ b/storage/cloud.py @@ -35,8 +35,8 @@ class StreamReadKeyAsFile(object): class _CloudStorage(BaseStorage): - def __init__(self, connection_class, key_class, upload_params, storage_path, access_key, - secret_key, bucket_name): + def __init__(self, connection_class, key_class, connect_kwargs, upload_params, storage_path, + access_key, secret_key, bucket_name): self._initialized = False self._bucket_name = bucket_name self._access_key = access_key @@ -45,12 +45,14 @@ class _CloudStorage(BaseStorage): self._connection_class = connection_class self._key_class = key_class self._upload_params = upload_params + self._connect_kwargs = connect_kwargs self._cloud_conn = None self._cloud_bucket = None def _initialize_cloud_conn(self): if not self._initialized: - self._cloud_conn = self._connection_class(self._access_key, self._secret_key) + self._cloud_conn = self._connection_class(self._access_key, self._secret_key, + **self._connect_kwargs) self._cloud_bucket = self._cloud_conn.get_bucket(self._bucket_name) self._initialized = True @@ -87,15 +89,22 @@ class _CloudStorage(BaseStorage): key.set_contents_from_string(content, **self._upload_params) return path - def get_supports_resumeable_downloads(self): + def get_supports_resumable_downloads(self): return True - def get_direct_download_url(self, path, expires_in=60): + def get_direct_download_url(self, path, expires_in=60, requires_cors=False): self._initialize_cloud_conn() path = self._init_path(path) k = self._key_class(self._cloud_bucket, path) return k.generate_url(expires_in) + def get_direct_upload_url(self, path, mime_type, requires_cors=True): + self._initialize_cloud_conn() + path = self._init_path(path) + key = self._key_class(self._cloud_bucket, path) + url = key.generate_url(300, 'PUT', headers={'Content-Type': mime_type}, encrypt_key=True) + return url + def stream_read(self, path): self._initialize_cloud_conn() path = self._init_path(path) @@ -179,21 +188,32 @@ class _CloudStorage(BaseStorage): for key in self._cloud_bucket.list(prefix=path): key.delete() + def get_checksum(self, path): + self._initialize_cloud_conn() + path = self._init_path(path) + key = self._key_class(self._cloud_bucket, path) + k = self._cloud_bucket.lookup(key) + return k.etag[1:-1][:7] + class S3Storage(_CloudStorage): def __init__(self, storage_path, s3_access_key, s3_secret_key, s3_bucket): upload_params = { 'encrypt_key': True, } + connect_kwargs = {} super(S3Storage, self).__init__(boto.s3.connection.S3Connection, boto.s3.key.Key, - upload_params, storage_path, s3_access_key, s3_secret_key, - s3_bucket) + connect_kwargs, upload_params, storage_path, s3_access_key, + s3_secret_key, s3_bucket) class GoogleCloudStorage(_CloudStorage): def __init__(self, storage_path, access_key, secret_key, bucket_name): - super(GoogleCloudStorage, self).__init__(boto.gs.connection.GSConnection, boto.gs.key.Key, {}, - storage_path, access_key, secret_key, bucket_name) + upload_params = {} + connect_kwargs = {} + super(GoogleCloudStorage, self).__init__(boto.gs.connection.GSConnection, boto.gs.key.Key, + connect_kwargs, upload_params, storage_path, + access_key, secret_key, bucket_name) def stream_write(self, path, fp): # Minimum size of upload part size on S3 is 5MB @@ -201,3 +221,30 @@ class GoogleCloudStorage(_CloudStorage): path = self._init_path(path) key = self._key_class(self._cloud_bucket, path) key.set_contents_from_stream(fp) + + +class RadosGWStorage(_CloudStorage): + def __init__(self, hostname, is_secure, storage_path, access_key, secret_key, bucket_name): + upload_params = {} + connect_kwargs = { + 'host': hostname, + 'is_secure': is_secure, + 'calling_format': boto.s3.connection.OrdinaryCallingFormat(), + } + super(RadosGWStorage, self).__init__(boto.s3.connection.S3Connection, boto.s3.key.Key, + connect_kwargs, upload_params, storage_path, access_key, + secret_key, bucket_name) + + # TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624 + def get_direct_download_url(self, path, expires_in=60, requires_cors=False): + if requires_cors: + return None + + return super(RadosGWStorage, self).get_direct_download_url(path, expires_in, requires_cors) + + # TODO remove when radosgw supports cors: http://tracker.ceph.com/issues/8718#change-38624 + def get_direct_upload_url(self, path, mime_type, requires_cors=True): + if requires_cors: + return None + + return super(RadosGWStorage, self).get_direct_upload_url(path, mime_type, requires_cors) diff --git a/storage/distributedstorage.py b/storage/distributedstorage.py index 9941f0fa5..1544d9725 100644 --- a/storage/distributedstorage.py +++ b/storage/distributedstorage.py @@ -31,6 +31,7 @@ class DistributedStorage(StoragePaths): self.preferred_locations = list(preferred_locations) get_direct_download_url = _location_aware(BaseStorage.get_direct_download_url) + get_direct_upload_url = _location_aware(BaseStorage.get_direct_upload_url) get_content = _location_aware(BaseStorage.get_content) put_content = _location_aware(BaseStorage.put_content) stream_read = _location_aware(BaseStorage.stream_read) @@ -39,4 +40,5 @@ class DistributedStorage(StoragePaths): list_directory = _location_aware(BaseStorage.list_directory) exists = _location_aware(BaseStorage.exists) remove = _location_aware(BaseStorage.remove) - get_supports_resumeable_downloads = _location_aware(BaseStorage.get_supports_resumeable_downloads) + get_checksum = _location_aware(BaseStorage.get_checksum) + get_supports_resumable_downloads = _location_aware(BaseStorage.get_supports_resumable_downloads) diff --git a/storage/fakestorage.py b/storage/fakestorage.py index 597d22af4..5761acf2f 100644 --- a/storage/fakestorage.py +++ b/storage/fakestorage.py @@ -22,3 +22,6 @@ class FakeStorage(BaseStorage): def exists(self, path): return False + + def get_checksum(self, path): + return 'abcdefg' \ No newline at end of file diff --git a/storage/local.py b/storage/local.py index 361d76403..55e79077b 100644 --- a/storage/local.py +++ b/storage/local.py @@ -1,4 +1,3 @@ - import os import shutil @@ -80,3 +79,14 @@ class LocalStorage(BaseStorage): os.remove(path) except OSError: pass + + def get_checksum(self, path): + path = self._init_path(path) + sha_hash = hashlib.sha256() + with open(path, 'r') as to_hash: + while True: + buf = to_hash.read(self.buffer_size) + if not buf: + break + sha_hash.update(buf) + return sha_hash.hexdigest()[:7] diff --git a/test/testconfig.py b/test/testconfig.py index c74e5712a..35c96a803 100644 --- a/test/testconfig.py +++ b/test/testconfig.py @@ -30,7 +30,7 @@ class TestConfig(DefaultConfig): BUILDLOGS_MODULE_AND_CLASS = ('test.testlogs', 'testlogs.TestBuildLogs') BUILDLOGS_OPTIONS = ['devtable', 'building', 'deadbeef-dead-beef-dead-beefdeadbeef', False] - USERFILES_TYPE = 'FakeUserfiles' + USERFILES_LOCATION = 'local_us' FEATURE_SUPER_USERS = True FEATURE_BILLING = True diff --git a/workers/dockerfilebuild.py b/workers/dockerfilebuild.py index b373a00a9..143a27103 100644 --- a/workers/dockerfilebuild.py +++ b/workers/dockerfilebuild.py @@ -495,7 +495,7 @@ class DockerfileBuildWorker(Worker): job_config = json.loads(repository_build.job_config) - resource_url = user_files.get_file_url(repository_build.resource_key) + resource_url = user_files.get_file_url(repository_build.resource_key, requires_cors=False) tag_names = job_config['docker_tags'] build_subdir = job_config['build_subdir'] repo = job_config['repository'] From 756e8ec84868a1f678b3efab5a18def9bc97ef9c Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Tue, 9 Sep 2014 16:52:53 -0400 Subject: [PATCH 05/10] Send the content type through to the cloud engines. --- data/userfiles.py | 6 ++++-- storage/basestorage.py | 2 +- storage/cloud.py | 16 +++++++++++++--- storage/fakestorage.py | 2 +- storage/local.py | 2 +- 5 files changed, 20 insertions(+), 8 deletions(-) diff --git a/data/userfiles.py b/data/userfiles.py index e6d21c1c1..c3113802f 100644 --- a/data/userfiles.py +++ b/data/userfiles.py @@ -33,8 +33,10 @@ class UserfilesHandlers(View): # encoding (Gunicorn) input_stream = request.environ['wsgi.input'] + c_type = request.headers.get('Content-Type', None) + path = self._files.get_file_id_path(file_id) - self._storage.stream_write(self._locations, path, input_stream) + self._storage.stream_write(self._locations, path, input_stream, c_type) return make_response('Okay') @@ -78,7 +80,7 @@ class DelegateUserfiles(object): def store_file(self, file_like_obj, content_type): file_id = str(uuid4()) path = self.get_file_id_path(file_id) - self._storage.stream_write(self._locations, path, file_like_obj) + self._storage.stream_write(self._locations, path, file_like_obj, content_type) return file_id def get_file_url(self, file_id, expires_in=300, requires_cors=False): diff --git a/storage/basestorage.py b/storage/basestorage.py index aa6434b8e..78d49aa1f 100644 --- a/storage/basestorage.py +++ b/storage/basestorage.py @@ -75,7 +75,7 @@ class BaseStorage(StoragePaths): def stream_read_file(self, path): raise NotImplementedError - def stream_write(self, path, fp): + def stream_write(self, path, fp, content_type=None): raise NotImplementedError def list_directory(self, path=None): diff --git a/storage/cloud.py b/storage/cloud.py index a576a6401..28325e187 100644 --- a/storage/cloud.py +++ b/storage/cloud.py @@ -125,14 +125,20 @@ class _CloudStorage(BaseStorage): raise IOError('No such key: \'{0}\''.format(path)) return StreamReadKeyAsFile(key) - def stream_write(self, path, fp): + def stream_write(self, path, fp, content_type=None): # Minimum size of upload part size on S3 is 5MB self._initialize_cloud_conn() buffer_size = 5 * 1024 * 1024 if self.buffer_size > buffer_size: buffer_size = self.buffer_size path = self._init_path(path) - mp = self._cloud_bucket.initiate_multipart_upload(path, **self._upload_params) + + metadata = {} + if content_type is not None: + metadata['Content-Type'] = content_type + + mp = self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata, + **self._upload_params) num_part = 1 while True: try: @@ -215,11 +221,15 @@ class GoogleCloudStorage(_CloudStorage): connect_kwargs, upload_params, storage_path, access_key, secret_key, bucket_name) - def stream_write(self, path, fp): + def stream_write(self, path, fp, content_type=None): # Minimum size of upload part size on S3 is 5MB self._initialize_cloud_conn() path = self._init_path(path) key = self._key_class(self._cloud_bucket, path) + + if content_type is not None: + key.set_metadata('Content-Type', content_type) + key.set_contents_from_stream(fp) diff --git a/storage/fakestorage.py b/storage/fakestorage.py index 5761acf2f..232f5af24 100644 --- a/storage/fakestorage.py +++ b/storage/fakestorage.py @@ -14,7 +14,7 @@ class FakeStorage(BaseStorage): def stream_read(self, path): yield '' - def stream_write(self, path, fp): + def stream_write(self, path, fp, content_type=None): pass def remove(self, path): diff --git a/storage/local.py b/storage/local.py index 55e79077b..a800645a8 100644 --- a/storage/local.py +++ b/storage/local.py @@ -41,7 +41,7 @@ class LocalStorage(BaseStorage): path = self._init_path(path) return open(path, mode='rb') - def stream_write(self, path, fp): + def stream_write(self, path, fp, content_type=None): # Size is mandatory path = self._init_path(path, create=True) with open(path, mode='wb') as f: From c9e16487817126b04d9b64085ed46a7e9ed35632 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Tue, 9 Sep 2014 18:30:14 -0400 Subject: [PATCH 06/10] Small fixes to bugs in the streaming handler for use with magic and radosgw. --- config.py | 8 ++++---- data/userfiles.py | 6 +++++- storage/cloud.py | 20 +++++++++++++------- storage/local.py | 4 +++- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/config.py b/config.py index f797cb36a..ffcf7f79e 100644 --- a/config.py +++ b/config.py @@ -89,10 +89,6 @@ class DefaultConfig(object): # Stripe config BILLING_TYPE = 'FakeStripe' - # Userfiles - USERFILES_TYPE = 'LocalUserfiles' - USERFILES_PATH = 'test/data/registry/userfiles' - # Analytics ANALYTICS_TYPE = 'FakeAnalytics' @@ -172,3 +168,7 @@ class DefaultConfig(object): } DISTRIBUTED_STORAGE_PREFERENCE = ['local_us'] + + # Userfiles + USERFILES_LOCATION = 'local_us' + USERFILES_PATH = 'userfiles/' diff --git a/data/userfiles.py b/data/userfiles.py index c3113802f..7ee7726e4 100644 --- a/data/userfiles.py +++ b/data/userfiles.py @@ -5,6 +5,8 @@ import magic from uuid import uuid4 from flask import url_for, request, send_file, make_response, abort from flask.views import View +from io import BufferedReader + logger = logging.getLogger(__name__) @@ -22,7 +24,9 @@ class UserfilesHandlers(View): path = self._files.get_file_id_path(file_id) try: file_stream = self._storage.stream_read_file(self._locations, path) - return send_file(file_stream) + buffered = BufferedReader(file_stream) + file_header_bytes = buffered.peek(1024) + return send_file(buffered, mimetype=self._magic.from_buffer(file_header_bytes)) except IOError: abort(404) diff --git a/storage/cloud.py b/storage/cloud.py index 28325e187..0d2028e1b 100644 --- a/storage/cloud.py +++ b/storage/cloud.py @@ -7,23 +7,19 @@ import boto.gs.connection import boto.s3.key import boto.gs.key +from io import UnsupportedOperation, BufferedIOBase + from storage.basestorage import BaseStorage logger = logging.getLogger(__name__) -class StreamReadKeyAsFile(object): +class StreamReadKeyAsFile(BufferedIOBase): def __init__(self, key): self._key = key self._finished = False - def __enter__(self): - return self - - def __exit__(self, type, value, tb): - self._key.close(fast=True) - def read(self, amt=None): if self._finished: return None @@ -33,6 +29,16 @@ class StreamReadKeyAsFile(object): self._finished = True return resp + def readable(self): + return True + + @property + def closed(self): + return self._key.closed + + def close(self): + self._key.close(fast=True) + class _CloudStorage(BaseStorage): def __init__(self, connection_class, key_class, connect_kwargs, upload_params, storage_path, diff --git a/storage/local.py b/storage/local.py index a800645a8..987431e33 100644 --- a/storage/local.py +++ b/storage/local.py @@ -1,5 +1,7 @@ import os import shutil +import hashlib +import io from storage.basestorage import BaseStorage @@ -39,7 +41,7 @@ class LocalStorage(BaseStorage): def stream_read_file(self, path): path = self._init_path(path) - return open(path, mode='rb') + return io.open(path, mode='rb') def stream_write(self, path, fp, content_type=None): # Size is mandatory From 548f855f71f89c5a375584386f006181ae014ac8 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Tue, 9 Sep 2014 22:28:25 -0400 Subject: [PATCH 07/10] Use the pure python io module to avoid some interaction between gunicorn, wsgi, and bufferedreader that prevents gunicorn from properly sending the files. --- data/userfiles.py | 2 +- storage/cloud.py | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/data/userfiles.py b/data/userfiles.py index 7ee7726e4..950c4dd60 100644 --- a/data/userfiles.py +++ b/data/userfiles.py @@ -5,7 +5,7 @@ import magic from uuid import uuid4 from flask import url_for, request, send_file, make_response, abort from flask.views import View -from io import BufferedReader +from _pyio import BufferedReader logger = logging.getLogger(__name__) diff --git a/storage/cloud.py b/storage/cloud.py index 0d2028e1b..f7d922d6c 100644 --- a/storage/cloud.py +++ b/storage/cloud.py @@ -7,7 +7,7 @@ import boto.gs.connection import boto.s3.key import boto.gs.key -from io import UnsupportedOperation, BufferedIOBase +from io import BufferedIOBase from storage.basestorage import BaseStorage @@ -18,15 +18,12 @@ logger = logging.getLogger(__name__) class StreamReadKeyAsFile(BufferedIOBase): def __init__(self, key): self._key = key - self._finished = False def read(self, amt=None): - if self._finished: + if self.closed: return None resp = self._key.read(amt) - if not resp: - self._finished = True return resp def readable(self): From 11b690cba93089cef2bd16eccfccf1e97434d8f0 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 10 Sep 2014 14:17:39 -0400 Subject: [PATCH 08/10] Fix slack help url --- static/js/app.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/js/app.js b/static/js/app.js index 0551df2dc..26b8a4be1 100644 --- a/static/js/app.js +++ b/static/js/app.js @@ -1280,7 +1280,7 @@ quayApp = angular.module('quay', quayDependencies, function($provide, cfpLoading 'name': 'token', 'type': 'string', 'title': 'Token', - 'help_url': 'https://{subdomain}.slack.com/services/new/outgoing-webhook' + 'help_url': 'https://{subdomain}.slack.com/services/new/incoming-webhook' } ] } From 75f19dc6c6bd8e0cefa4e4a6094289c070be1486 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Wed, 10 Sep 2014 14:43:10 -0400 Subject: [PATCH 09/10] Refresh the version of phusion baseimage and the ubuntu package server contents. --- Dockerfile.buildworker | 4 ++-- Dockerfile.web | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Dockerfile.buildworker b/Dockerfile.buildworker index 04efe38f0..159c7867c 100644 --- a/Dockerfile.buildworker +++ b/Dockerfile.buildworker @@ -1,10 +1,10 @@ -FROM phusion/baseimage:0.9.11 +FROM phusion/baseimage:0.9.13 ENV DEBIAN_FRONTEND noninteractive ENV HOME /root # Install the dependencies. -RUN apt-get update # 21AUG2014 +RUN apt-get update # 10SEP2014 # New ubuntu packages should be added as their own apt-get install lines below the existing install commands RUN apt-get install -y git python-virtualenv python-dev libjpeg8 libjpeg62-dev libevent-dev gdebi-core g++ libmagic1 phantomjs nodejs npm libldap2-dev libsasl2-dev libpq-dev diff --git a/Dockerfile.web b/Dockerfile.web index e1d253632..b24694b42 100644 --- a/Dockerfile.web +++ b/Dockerfile.web @@ -1,10 +1,10 @@ -FROM phusion/baseimage:0.9.11 +FROM phusion/baseimage:0.9.13 ENV DEBIAN_FRONTEND noninteractive ENV HOME /root # Install the dependencies. -RUN apt-get update # 21AUG2014 +RUN apt-get update # 10SEP2014 # New ubuntu packages should be added as their own apt-get install lines below the existing install commands RUN apt-get install -y git python-virtualenv python-dev libjpeg8 libjpeg62-dev libevent-dev gdebi-core g++ libmagic1 phantomjs nodejs npm libldap2-dev libsasl2-dev libpq-dev From 539fc0420578ec188c8b0da8afad8b71658f2069 Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Wed, 10 Sep 2014 17:18:49 -0400 Subject: [PATCH 10/10] Seek the file pointer to zero since we now use multipart for upload of userfiles, which does not seek automatically. --- endpoints/trigger.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/endpoints/trigger.py b/endpoints/trigger.py index ab7aa9065..ae0b4b2b7 100644 --- a/endpoints/trigger.py +++ b/endpoints/trigger.py @@ -291,6 +291,9 @@ class GithubBuildTrigger(BuildTrigger): with tarfile.open(fileobj=tarball) as archive: tarball_subdir = archive.getnames()[0] + # Seek to position 0 to make boto multipart happy + tarball.seek(0) + dockerfile_id = user_files.store_file(tarball, TARBALL_MIME) logger.debug('Successfully prepared job')