diff --git a/app.py b/app.py index 3e312befb..2c96969af 100644 --- a/app.py +++ b/app.py @@ -8,6 +8,8 @@ from flask.ext.principal import Principal from flask.ext.login import LoginManager, UserMixin from flask.ext.mail import Mail from werkzeug.routing import BaseConverter +from jwkest.jwk import RSAKey +from Crypto.PublicKey import RSA import features @@ -43,13 +45,15 @@ OVERRIDE_CONFIG_PY_FILENAME = 'conf/stack/config.py' OVERRIDE_CONFIG_KEY = 'QUAY_OVERRIDE_CONFIG' +DOCKER_V2_SIGNINGKEY_FILENAME = 'docker_v2.pem' + app = Flask(__name__) logger = logging.getLogger(__name__) + class RegexConverter(BaseConverter): def __init__(self, url_map, *items): super(RegexConverter, self).__init__(url_map) - logger.debug('Installing regex converter with regex: %s', items[0]) self.regex = items[0] @@ -115,6 +119,11 @@ for handler in logging.getLogger().handlers: app.request_class = RequestWithId +# Generate a secret key if none was specified. +if app.config['SECRET_KEY'] is None: + logger.debug('Generating in-memory secret key') + app.config['SECRET_KEY'] = generate_secret_key() + features.import_features(app.config) Principal(app, use_sessions=False) @@ -155,15 +164,17 @@ secscan_notification_queue = WorkQueue(app.config['SECSCAN_NOTIFICATION_QUEUE_NA secscan_api = SecurityScannerAPI(app, config_provider) +# Check for a key in config. If none found, generate a new signing key for Docker V2 manifests. +_v2_key_path = os.path.join(OVERRIDE_CONFIG_DIRECTORY, DOCKER_V2_SIGNINGKEY_FILENAME) +if os.path.exists(_v2_key_path): + docker_v2_signing_key = RSAKey().load(_v2_key_path) +else: + docker_v2_signing_key = RSAKey(key=RSA.generate(2048)) + database.configure(app.config) model.config.app_config = app.config model.config.store = storage -# Generate a secret key if none was specified. -if app.config['SECRET_KEY'] is None: - logger.debug('Generating in-memory secret key') - app.config['SECRET_KEY'] = generate_secret_key() - @login_manager.user_loader def load_user(user_uuid): logger.debug('User loader loading deferred user with uuid: %s' % user_uuid) diff --git a/auth/jwt_auth.py b/auth/jwt_auth.py index 9a4aa1bbe..e4d1b15a5 100644 --- a/auth/jwt_auth.py +++ b/auth/jwt_auth.py @@ -1,7 +1,7 @@ import logging import re -from datetime import datetime, timedelta +from jsonschema import validate, ValidationError from functools import wraps from flask import request from flask.ext.principal import identity_changed, Identity @@ -20,7 +20,45 @@ from util.security import strictjwt logger = logging.getLogger(__name__) -TOKEN_REGEX = re.compile(r'Bearer (([a-zA-Z0-9+/]+\.)+[a-zA-Z0-9+-_/]+)') +TOKEN_REGEX = re.compile(r'^Bearer (([a-zA-Z0-9+/]+\.)+[a-zA-Z0-9+-_/]+)$') + + +ACCESS_SCHEMA = { + 'type': 'array', + 'description': 'List of access granted to the subject', + 'items': { + 'type': 'object', + 'required': [ + 'type', + 'name', + 'actions', + ], + 'properties': { + 'type': { + 'type': 'string', + 'description': 'We only allow repository permissions', + 'enum': [ + 'repository', + ], + }, + 'name': { + 'type': 'string', + 'description': 'The name of the repository for which we are receiving access' + }, + 'actions': { + 'type': 'array', + 'description': 'List of specific verbs which can be performed against repository', + 'items': { + 'type': 'string', + 'enum': [ + 'push', + 'pull', + ], + }, + }, + }, + }, +} class InvalidJWTException(Exception): @@ -36,7 +74,7 @@ def identity_from_bearer_token(bearer_token, max_signed_s, public_key): # Extract the jwt token from the header match = TOKEN_REGEX.match(bearer_token) - if match is None or match.end() != len(bearer_token): + if match is None: raise InvalidJWTException('Invalid bearer token format') encoded = match.group(1) @@ -44,27 +82,31 @@ def identity_from_bearer_token(bearer_token, max_signed_s, public_key): # Load the JWT returned. try: - payload = strictjwt.decode(encoded, public_key, algorithms=['RS256'], audience='quay', - issuer='token-issuer') + expected_issuer = app.config['JWT_AUTH_TOKEN_ISSUER'] + audience = app.config['SERVER_HOSTNAME'] + max_exp = strictjwt.exp_max_s_option(max_signed_s) + payload = strictjwt.decode(encoded, public_key, algorithms=['RS256'], audience=audience, + issuer=expected_issuer, options=max_exp) except strictjwt.InvalidTokenError: + logger.exception('Invalid token reason') raise InvalidJWTException('Invalid token') if not 'sub' in payload: raise InvalidJWTException('Missing sub field in JWT') - # Verify that the expiration is no more than 300 seconds in the future. - if datetime.fromtimestamp(payload['exp']) > datetime.utcnow() + timedelta(seconds=max_signed_s): - raise InvalidJWTException('Token was signed for more than %s seconds' % max_signed_s) - username = payload['sub'] loaded_identity = Identity(username, 'signed_jwt') # Process the grants from the payload - if 'access' in payload: - for grant in payload['access']: - if grant['type'] != 'repository': - continue + if 'access' in payload: + try: + validate(payload['access'], ACCESS_SCHEMA) + except ValidationError: + logger.exception('We should not be minting invalid credentials') + raise InvalidJWTException('Token contained invalid or malformed access grants') + + for grant in payload['access']: namespace, repo_name = parse_namespace_repository(grant['name']) if 'push' in grant['actions']: @@ -88,7 +130,7 @@ def process_jwt_auth(func): logger.debug('Called with params: %s, %s', args, kwargs) auth = request.headers.get('authorization', '').strip() if auth: - max_signature_seconds = app.config.get('JWT_AUTH_MAX_FRESH_S', 300) + max_signature_seconds = app.config.get('JWT_AUTH_MAX_FRESH_S', 3660) certificate_file_path = app.config['JWT_AUTH_CERTIFICATE_PATH'] public_key = load_public_key(certificate_file_path) diff --git a/conf/gunicorn_local.py b/conf/gunicorn_local.py index 49a30682d..f95d85cc1 100644 --- a/conf/gunicorn_local.py +++ b/conf/gunicorn_local.py @@ -1,3 +1,5 @@ +from Crypto import Random + bind = '0.0.0.0:5000' workers = 2 worker_class = 'gevent' @@ -5,3 +7,8 @@ daemon = False logconfig = 'conf/logging_debug.conf' pythonpath = '.' preload_app = True + +def post_fork(server, worker): + # Reset the Random library to ensure it won't raise the "PID check failed." error after + # gunicorn forks. + Random.atfork() diff --git a/conf/gunicorn_registry.py b/conf/gunicorn_registry.py index 944608868..9d7f080c1 100644 --- a/conf/gunicorn_registry.py +++ b/conf/gunicorn_registry.py @@ -1,6 +1,13 @@ +from Crypto import Random + bind = 'unix:/tmp/gunicorn_registry.sock' workers = 8 worker_class = 'gevent' logconfig = 'conf/logging.conf' pythonpath = '.' preload_app = True + +def post_fork(server, worker): + # Reset the Random library to ensure it won't raise the "PID check failed." error after + # gunicorn forks. + Random.atfork() diff --git a/conf/server-base.conf b/conf/server-base.conf index 5aa526f14..22889a5e9 100644 --- a/conf/server-base.conf +++ b/conf/server-base.conf @@ -34,10 +34,10 @@ location /realtime { proxy_request_buffering off; } -# At the begining and end of a push/pull, /v1/repositories is hit by the Docker +# At the begining and end of a push/pull, (/v1/repositories|/v2/auth/) is hit by the Docker # client. By rate-limiting just this endpoint, we can avoid accidentally # blocking pulls/pushes for images with many layers. -location /v1/repositories/ { +location ~ ^/(v1/repositories|v2/auth)/ { proxy_buffering off; proxy_request_buffering off; @@ -49,13 +49,14 @@ location /v1/repositories/ { limit_req zone=repositories burst=10; } -location /v1/ { +location ~ ^/(v1|v2)/ { proxy_buffering off; proxy_request_buffering off; proxy_http_version 1.1; proxy_set_header Connection ""; + proxy_set_header Host $host; proxy_pass http://registry_app_server; proxy_temp_path /tmp 1 2; diff --git a/config.py b/config.py index cd3d00726..6fa139e7d 100644 --- a/config.py +++ b/config.py @@ -42,7 +42,6 @@ class DefaultConfig(object): LOGGING_LEVEL = 'DEBUG' SEND_FILE_MAX_AGE_DEFAULT = 0 - POPULATE_DB_TEST_DATA = True PREFERRED_URL_SCHEME = 'http' SERVER_HOSTNAME = 'localhost:5000' @@ -191,6 +190,9 @@ class DefaultConfig(object): # Feature Flag: Whether users can directly login to the UI. FEATURE_DIRECT_LOGIN = True + # Feature Flag: Whether the v2/ endpoint is visible + FEATURE_ADVERTISE_V2 = True + BUILD_MANAGER = ('enterprise', {}) DISTRIBUTED_STORAGE_CONFIG = { @@ -230,7 +232,8 @@ class DefaultConfig(object): SIGNED_GRANT_EXPIRATION_SEC = 60 * 60 * 24 # One day to complete a push/pull # Registry v2 JWT Auth config - JWT_AUTH_MAX_FRESH_S = 60 * 5 # At most the JWT can be signed for 300s in the future + JWT_AUTH_MAX_FRESH_S = 60 * 60 + 60 # At most signed for one hour, accounting for clock skew + JWT_AUTH_TOKEN_ISSUER = 'quay-test-issuer' JWT_AUTH_CERTIFICATE_PATH = 'conf/selfsigned/jwt.crt' JWT_AUTH_PRIVATE_KEY_PATH = 'conf/selfsigned/jwt.key.insecure' diff --git a/data/model/__init__.py b/data/model/__init__.py index a8326ff96..0c8122c31 100644 --- a/data/model/__init__.py +++ b/data/model/__init__.py @@ -9,6 +9,10 @@ class BlobDoesNotExist(DataModelException): pass +class InvalidBlobUpload(DataModelException): + pass + + class InvalidEmailAddressException(DataModelException): pass @@ -65,6 +69,10 @@ class InvalidTeamMemberException(DataModelException): pass +class InvalidManifestException(DataModelException): + pass + + class TooManyLoginAttemptsException(Exception): def __init__(self, message, retry_after): super(TooManyLoginAttemptsException, self).__init__(message) diff --git a/data/model/blob.py b/data/model/blob.py index 5547c7646..5b8d38420 100644 --- a/data/model/blob.py +++ b/data/model/blob.py @@ -1,8 +1,8 @@ from uuid import uuid4 -from data.model import tag, _basequery, BlobDoesNotExist, db_transaction +from data.model import tag, _basequery, BlobDoesNotExist, InvalidBlobUpload, db_transaction from data.database import (Repository, Namespace, ImageStorage, Image, ImageStorageLocation, - ImageStoragePlacement) + ImageStoragePlacement, BlobUpload) def get_repo_blob_by_digest(namespace, repo_name, blob_digest): @@ -15,35 +15,63 @@ def get_repo_blob_by_digest(namespace, repo_name, blob_digest): .join(ImageStorage) .join(Image) .join(Repository) - .join(Namespace) + .join(Namespace, on=(Namespace.id == Repository.namespace_user)) .where(Repository.name == repo_name, Namespace.username == namespace, - ImageStorage.content_checksum == blob_digest)) + ImageStorage.content_checksum == blob_digest, + ImageStorage.uploading == False)) if not placements: raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) found = placements[0].storage - found.locations = {placement.location.name for placement in placements} + found.locations = {placement.location.name for placement in placements + if placement.storage.id == found.id} return found -def store_blob_record_and_temp_link(namespace, repo_name, blob_digest, location_name, + +def store_blob_record_and_temp_link(namespace, repo_name, blob_digest, location_obj, byte_count, link_expiration_s): """ Store a record of the blob and temporarily link it to the specified repository. """ random_image_name = str(uuid4()) - with db_transaction: + with db_transaction(): repo = _basequery.get_existing_repository(namespace, repo_name) - try: storage = ImageStorage.get(content_checksum=blob_digest) - location = ImageStorageLocation.get(name=location_name) - ImageStoragePlacement.get(storage=storage, location=location) + storage.image_size = byte_count + storage.save() + + ImageStoragePlacement.get(storage=storage, location=location_obj) except ImageStorage.DoesNotExist: - storage = ImageStorage.create(content_checksum=blob_digest) + storage = ImageStorage.create(content_checksum=blob_digest, uploading=False, + image_size=byte_count) + ImageStoragePlacement.create(storage=storage, location=location_obj) except ImageStoragePlacement.DoesNotExist: - ImageStoragePlacement.create(storage=storage, location=location) + ImageStoragePlacement.create(storage=storage, location=location_obj) # Create a temporary link into the repository, to be replaced by the v1 metadata later # and create a temporary tag to reference it image = Image.create(storage=storage, docker_image_id=random_image_name, repository=repo) tag.create_temporary_hidden_tag(repo, image, link_expiration_s) + + +def get_blob_upload(namespace, repo_name, upload_uuid): + """ Load the upload which is already in progress. + """ + try: + return (BlobUpload + .select() + .join(Repository) + .join(Namespace, on=(Namespace.id == Repository.namespace_user)) + .where(Repository.name == repo_name, Namespace.username == namespace, + BlobUpload.uuid == upload_uuid) + .get()) + except BlobUpload.DoesNotExist: + raise InvalidBlobUpload() + + +def initiate_upload(namespace, repo_name, uuid, location_name, storage_metadata): + repo = _basequery.get_existing_repository(namespace, repo_name) + location = ImageStorageLocation.get(name=location_name) + return BlobUpload.create(repository=repo, location=location, uuid=uuid, + storage_metadata=storage_metadata) diff --git a/data/model/image.py b/data/model/image.py index fceaa89f9..87ae77fc5 100644 --- a/data/model/image.py +++ b/data/model/image.py @@ -4,7 +4,8 @@ import dateutil.parser from peewee import JOIN_LEFT_OUTER, fn, SQL from datetime import datetime -from data.model import DataModelException, db_transaction, _basequery, storage +from data.model import (DataModelException, db_transaction, _basequery, storage, + InvalidImageException, config) from data.database import (Image, Repository, ImageStoragePlacement, Namespace, ImageStorage, ImageStorageLocation, RepositoryPermission, db_for_update) @@ -32,7 +33,9 @@ def get_repository_image_and_deriving(docker_image_id, storage_uuid): def get_parent_images(namespace_name, repository_name, image_obj): - """ Returns a list of parent Image objects in chronilogical order. """ + """ Returns a list of parent Image objects starting with the most recent parent + and ending with the base layer. + """ parents = image_obj.ancestors # Ancestors are in the format ///...//, with each path section @@ -49,7 +52,7 @@ def get_parent_images(namespace_name, repository_name, image_obj): id_to_image = {unicode(image.id): image for image in parents} - return [id_to_image[parent_id] for parent_id in parent_db_ids] + return [id_to_image[parent_id] for parent_id in reversed(parent_db_ids)] def get_repo_image(namespace_name, repository_name, docker_image_id): @@ -97,7 +100,6 @@ def get_repository_images_base(namespace_name, repository_name, query_modifier): .where(Repository.name == repository_name, Namespace.username == namespace_name)) query = query_modifier(query) - return invert_placement_query_results(query) @@ -127,18 +129,15 @@ def invert_placement_query_results(placement_query): return images.values() -def lookup_repository_images(namespace_name, repository_name, docker_image_ids): +def lookup_repository_images(repo, docker_image_ids): return (Image .select() - .join(Repository) - .join(Namespace, on=(Repository.namespace_user == Namespace.id)) - .where(Repository.name == repository_name, Namespace.username == namespace_name, - Image.docker_image_id << docker_image_ids)) + .where(Image.repository == repo, Image.docker_image_id << docker_image_ids)) def get_matching_repository_images(namespace_name, repository_name, docker_image_ids): def modify_query(query): - return query.where(Image.docker_image_id << docker_image_ids) + return query.where(Image.docker_image_id << list(docker_image_ids)) return get_repository_images_base(namespace_name, repository_name, modify_query) @@ -164,8 +163,8 @@ def get_repository_images(namespace_name, repository_name): def get_image_by_id(namespace_name, repository_name, docker_image_id): image = get_repo_image_extended(namespace_name, repository_name, docker_image_id) if not image: - raise DataModelException('Unable to find image \'%s\' for repo \'%s/%s\'' % - (docker_image_id, namespace_name, repository_name)) + raise InvalidImageException('Unable to find image \'%s\' for repo \'%s/%s\'' % + (docker_image_id, namespace_name, repository_name)) return image @@ -286,7 +285,7 @@ def find_create_or_link_image(docker_image_id, repo_obj, username, translations, return repo_image logger.debug('Creating new storage for docker id: %s', docker_image_id) - new_storage = storage.create_storage(preferred_location) + new_storage = storage.create_v1_storage(preferred_location) return Image.create(docker_image_id=docker_image_id, repository=repo_obj, storage=new_storage, @@ -331,7 +330,6 @@ def set_image_metadata(docker_image_id, namespace_name, repository_name, created fetched.parent_id = parent.id fetched.save() - fetched.storage.save() return fetched @@ -379,13 +377,76 @@ def set_image_size(docker_image_id, namespace_name, repository_name, image_size, return image -def get_image(repo, dockerfile_id): +def get_image(repo, docker_image_id): try: - return Image.get(Image.docker_image_id == dockerfile_id, Image.repository == repo) + return Image.get(Image.docker_image_id == docker_image_id, Image.repository == repo) except Image.DoesNotExist: return None +def get_repo_image_by_storage_checksum(namespace, repository_name, storage_checksum): + try: + return (Image + .select() + .join(ImageStorage) + .switch(Image) + .join(Repository) + .join(Namespace, on=(Namespace.id == Repository.namespace_user)) + .where(Repository.name == repository_name, Namespace.username == namespace, + ImageStorage.content_checksum == storage_checksum, + ImageStorage.uploading == False) + .get()) + except Image.DoesNotExist: + msg = 'Image with storage checksum {0} does not exist in repo {1}/{2}'.format(storage_checksum, + namespace, + repository_name) + raise InvalidImageException(msg) + + +def get_image_layers(image): + """ Returns a list of the full layers of an image, including itself (if specified), sorted + from base image outward. """ + ancestors = image.ancestors.split('/')[1:-1] + image_ids = [ancestor_id for ancestor_id in ancestors if ancestor_id] + image_ids.append(str(image.id)) + + query = (ImageStoragePlacement + .select(ImageStoragePlacement, Image, ImageStorage, ImageStorageLocation) + .join(ImageStorageLocation) + .switch(ImageStoragePlacement) + .join(ImageStorage, JOIN_LEFT_OUTER) + .join(Image) + .where(Image.id << image_ids)) + + image_list = list(_translate_placements_to_images_with_locations(query)) + image_list.sort(key=lambda image: image_ids.index(str(image.id))) + return image_list + + +def synthesize_v1_image(repo, image_storage, docker_image_id, created_date_str, + comment, command, v1_json_metadata, parent_image=None): + """ Find an existing image with this docker image id, and if none exists, write one with the + specified metadata. + """ + ancestors = '/' + parent_id = None + if parent_image is not None: + ancestors = '{0}{1}/'.format(parent_image.ancestors, parent_image.id) + parent_id = parent_image.id + + created = None + if created_date_str is not None: + try: + created = dateutil.parser.parse(created_date_str).replace(tzinfo=None) + except: + # parse raises different exceptions, so we cannot use a specific kind of handler here. + pass + + return Image.create(docker_image_id=docker_image_id, ancestors=ancestors, comment=comment, + command=command, v1_json_metadata=v1_json_metadata, created=created, + storage=image_storage, repository=repo, parent_id=parent_id) + + def ensure_image_locations(*names): with db_transaction(): locations = ImageStorageLocation.select().where(ImageStorageLocation.name << names) diff --git a/data/model/repository.py b/data/model/repository.py index 8379a1f6d..c2e1b74d6 100644 --- a/data/model/repository.py +++ b/data/model/repository.py @@ -7,8 +7,8 @@ from data.model import (DataModelException, tag, db_transaction, storage, image, _basequery, config) from data.database import (Repository, Namespace, RepositoryTag, Star, Image, ImageStorage, User, Visibility, RepositoryPermission, TupleSelector, RepositoryActionCount, - Role, RepositoryAuthorizedEmail, db_for_update, get_epoch_timestamp, - db_random_func) + Role, RepositoryAuthorizedEmail, TagManifest, db_for_update, + get_epoch_timestamp, db_random_func) logger = logging.getLogger(__name__) @@ -47,6 +47,12 @@ def _purge_all_repository_tags(namespace_name, repository_name): except Repository.DoesNotExist: raise DataModelException('Invalid repository \'%s/%s\'' % (namespace_name, repository_name)) + + # Delete all manifests. + repo_tags = RepositoryTag.select().where(RepositoryTag.repository == repo.id) + TagManifest.delete().where(TagManifest.tag << repo_tags).execute() + + # Delete all tags. RepositoryTag.delete().where(RepositoryTag.repository == repo.id).execute() diff --git a/data/model/storage.py b/data/model/storage.py index 0ddbc8ac8..77ab9148c 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -5,7 +5,7 @@ from peewee import JOIN_LEFT_OUTER, fn from data.model import config, db_transaction, InvalidImageException from data.database import (ImageStorage, Image, DerivedImageStorage, ImageStoragePlacement, ImageStorageLocation, ImageStorageTransformation, ImageStorageSignature, - ImageStorageSignatureKind) + ImageStorageSignatureKind, Repository, Namespace) logger = logging.getLogger(__name__) @@ -24,7 +24,7 @@ def find_or_create_derived_storage(source, transformation_name, preferred_locati logger.debug('Creating storage dervied from source: %s', source.uuid) trans = ImageStorageTransformation.get(name=transformation_name) - new_storage = create_storage(preferred_location) + new_storage = create_v1_storage(preferred_location) DerivedImageStorage.create(source=source, derivative=new_storage, transformation=trans) return new_storage @@ -34,7 +34,7 @@ def garbage_collect_storage(storage_id_whitelist): return def placements_query_to_paths_set(placements_query): - return {(placement.location.name, config.store.image_path(placement.storage.uuid)) + return {(placement.location.name, get_layer_path(placement.storage)) for placement in placements_query} def orphaned_storage_query(select_base_query, candidates, group_by): @@ -123,7 +123,7 @@ def garbage_collect_storage(storage_id_whitelist): config.store.remove({location_name}, image_path) -def create_storage(location_name): +def create_v1_storage(location_name): storage = ImageStorage.create(cas_path=False) location = ImageStorageLocation.get(name=location_name) ImageStoragePlacement.create(location=location, storage=storage) @@ -144,10 +144,9 @@ def lookup_storage_signature(storage, signature_kind): kind = ImageStorageSignatureKind.get(name=signature_kind) try: return (ImageStorageSignature - .select() - .where(ImageStorageSignature.storage == storage, - ImageStorageSignature.kind == kind) - .get()) + .select() + .where(ImageStorageSignature.storage == storage, ImageStorageSignature.kind == kind) + .get()) except ImageStorageSignature.DoesNotExist: return None @@ -155,12 +154,12 @@ def lookup_storage_signature(storage, signature_kind): def find_derived_storage(source, transformation_name): try: found = (ImageStorage - .select(ImageStorage, DerivedImageStorage) - .join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative)) - .join(ImageStorageTransformation) - .where(DerivedImageStorage.source == source, - ImageStorageTransformation.name == transformation_name) - .get()) + .select(ImageStorage, DerivedImageStorage) + .join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative)) + .join(ImageStorageTransformation) + .where(DerivedImageStorage.source == source, + ImageStorageTransformation.name == transformation_name) + .get()) found.locations = {placement.location.name for placement in found.imagestorageplacement_set} return found @@ -182,16 +181,17 @@ def delete_derived_storage_by_uuid(storage_uuid): image_storage.delete_instance(recursive=True) -def get_storage_by_uuid(storage_uuid): - placements = list(ImageStoragePlacement - .select(ImageStoragePlacement, ImageStorage, ImageStorageLocation) - .join(ImageStorageLocation) - .switch(ImageStoragePlacement) - .join(ImageStorage) - .where(ImageStorage.uuid == storage_uuid)) +def _get_storage(query_modifier): + query = (ImageStoragePlacement + .select(ImageStoragePlacement, ImageStorage, ImageStorageLocation) + .join(ImageStorageLocation) + .switch(ImageStoragePlacement) + .join(ImageStorage)) + + placements = list(query_modifier(query)) if not placements: - raise InvalidImageException('No storage found with uuid: %s', storage_uuid) + raise InvalidImageException() found = placements[0].storage found.locations = {placement.location.name for placement in placements} @@ -199,3 +199,29 @@ def get_storage_by_uuid(storage_uuid): return found +def get_storage_by_uuid(storage_uuid): + def filter_to_uuid(query): + return query.where(ImageStorage.uuid == storage_uuid) + + try: + return _get_storage(filter_to_uuid) + except InvalidImageException: + raise InvalidImageException('No storage found with uuid: %s', storage_uuid) + + +def get_layer_path(storage_record): + """ Returns the path in the storage engine to the layer data referenced by the storage row. """ + store = config.store + if not storage_record.cas_path: + logger.debug('Serving layer from legacy v1 path') + return store.v1_image_layer_path(storage_record.uuid) + + return store.blob_path(storage_record.content_checksum) + +def lookup_repo_storages_by_content_checksum(repo, checksums): + """ Looks up repository storages (without placements) matching the given repository + and checksum. """ + return (ImageStorage + .select() + .join(Image) + .where(Image.repository == repo, ImageStorage.content_checksum << checksums)) diff --git a/data/model/tag.py b/data/model/tag.py index 002be14b2..1a7932347 100644 --- a/data/model/tag.py +++ b/data/model/tag.py @@ -1,7 +1,8 @@ from uuid import uuid4 -from data.model import image, db_transaction, DataModelException, _basequery -from data.database import (RepositoryTag, Repository, Image, ImageStorage, Namespace, +from data.model import (image, db_transaction, DataModelException, _basequery, + InvalidManifestException) +from data.database import (RepositoryTag, Repository, Image, ImageStorage, Namespace, TagManifest, get_epoch_timestamp, db_for_update) @@ -50,8 +51,8 @@ def list_repository_tags(namespace_name, repository_name, include_hidden=False, return query -def create_or_update_tag(namespace_name, repository_name, tag_name, - tag_docker_image_id, reversion=False): +def create_or_update_tag(namespace_name, repository_name, tag_name, tag_docker_image_id, + reversion=False): try: repo = _basequery.get_existing_repository(namespace_name, repository_name) except Repository.DoesNotExist: @@ -174,3 +175,62 @@ def revert_tag(repo_obj, tag_name, docker_image_id): return create_or_update_tag(repo_obj.namespace_user.username, repo_obj.name, tag_name, docker_image_id, reversion=True) + +def store_tag_manifest(namespace, repo_name, tag_name, docker_image_id, manifest_digest, + manifest_data): + with db_transaction(): + tag = create_or_update_tag(namespace, repo_name, tag_name, docker_image_id) + + try: + manifest = TagManifest.get(digest=manifest_digest) + manifest.tag = tag + manifest.save() + except TagManifest.DoesNotExist: + return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data) + + +def _get_active_tag(namespace, repo_name, tag_name): + return _tag_alive(RepositoryTag + .select() + .join(Image) + .join(Repository) + .join(Namespace, on=(Repository.namespace_user == Namespace.id)) + .where(RepositoryTag.name == tag_name, Repository.name == repo_name, + Namespace.username == namespace)).get() + + +def associate_generated_tag_manifest(namespace, repo_name, tag_name, manifest_digest, + manifest_data): + tag = _get_active_tag(namespace, repo_name, tag_name) + return TagManifest.create(tag=tag, digest=manifest_digest, json_data=manifest_data) + + +def load_tag_manifest(namespace, repo_name, tag_name): + try: + return (_load_repo_manifests(namespace, repo_name) + .where(RepositoryTag.name == tag_name) + .get()) + except TagManifest.DoesNotExist: + msg = 'Manifest not found for tag {0} in repo {1}/{2}'.format(tag_name, namespace, repo_name) + raise InvalidManifestException(msg) + + +def load_manifest_by_digest(namespace, repo_name, digest): + try: + return (_load_repo_manifests(namespace, repo_name) + .where(TagManifest.digest == digest) + .get()) + except TagManifest.DoesNotExist: + msg = 'Manifest not found with digest {0} in repo {1}/{2}'.format(digest, namespace, repo_name) + raise InvalidManifestException(msg) + + +def _load_repo_manifests(namespace, repo_name): + return (TagManifest + .select(TagManifest, RepositoryTag) + .join(RepositoryTag) + .join(Image) + .join(Repository) + .join(Namespace, on=(Namespace.id == Repository.namespace_user)) + .where(Repository.name == repo_name, Namespace.username == namespace)) + diff --git a/data/users/externaljwt.py b/data/users/externaljwt.py index ac29f22a1..55008aa9d 100644 --- a/data/users/externaljwt.py +++ b/data/users/externaljwt.py @@ -2,7 +2,6 @@ import logging import json import os -from datetime import datetime, timedelta from data.users.federated import FederatedUsers, VerifiedCredentials from util.security import strictjwt @@ -46,9 +45,11 @@ class ExternalJWTAuthN(FederatedUsers): # Load the JWT returned. encoded = result_data.get('token', '') + exp_limit_options = strictjwt.exp_max_s_option(self.max_fresh_s) try: payload = strictjwt.decode(encoded, self.public_key, algorithms=['RS256'], - audience='quay.io/jwtauthn', issuer=self.issuer) + audience='quay.io/jwtauthn', issuer=self.issuer, + options=exp_limit_options) except strictjwt.InvalidTokenError: logger.exception('Exception when decoding returned JWT') return (None, 'Invalid username or password') @@ -59,16 +60,6 @@ class ExternalJWTAuthN(FederatedUsers): if not 'email' in payload: raise Exception('Missing email field in JWT') - if not 'exp' in payload: - raise Exception('Missing exp field in JWT') - - # Verify that the expiration is no more than self.max_fresh_s seconds in the future. - expiration = datetime.utcfromtimestamp(payload['exp']) - if expiration > datetime.utcnow() + timedelta(seconds=self.max_fresh_s): - logger.debug('Payload expiration is outside of the %s second window: %s', self.max_fresh_s, - payload['exp']) - return (None, 'Invalid username or password') - # Parse out the username and email. return (VerifiedCredentials(username=payload['sub'], email=payload['email']), None) diff --git a/digest/digest_tools.py b/digest/digest_tools.py index efebac831..212088236 100644 --- a/digest/digest_tools.py +++ b/digest/digest_tools.py @@ -2,43 +2,57 @@ import re import os.path import hashlib -from collections import namedtuple - - -Digest = namedtuple('Digest', ['is_tarsum', 'tarsum_version', 'hash_alg', 'hash_bytes']) - - -DIGEST_PATTERN = r'(tarsum\.(v[\w]+)\+)?([\w]+):([0-9a-f]+)' -DIGEST_REGEX = re.compile(DIGEST_PATTERN) +DIGEST_PATTERN = r'([A-Za-z0-9_+.-]+):([A-Fa-f0-9]+)' +REPLACE_WITH_PATH = re.compile(r'[+.]') +REPLACE_DOUBLE_SLASHES = re.compile(r'/+') class InvalidDigestException(RuntimeError): pass -def parse_digest(digest): - """ Returns the digest parsed out to its components. """ - match = DIGEST_REGEX.match(digest) - if match is None or match.end() != len(digest): - raise InvalidDigestException('Not a valid digest: %s', digest) +class Digest(object): + DIGEST_REGEX = re.compile(DIGEST_PATTERN) - is_tarsum = match.group(1) is not None - return Digest(is_tarsum, match.group(2), match.group(3), match.group(4)) + def __init__(self, hash_alg, hash_bytes): + self._hash_alg = hash_alg + self._hash_bytes = hash_bytes + + def __str__(self): + return '{0}:{1}'.format(self._hash_alg, self._hash_bytes) + + def __eq__(self, rhs): + return isinstance(rhs, Digest) and str(self) == str(rhs) + + @staticmethod + def parse_digest(digest): + """ Returns the digest parsed out to its components. """ + match = Digest.DIGEST_REGEX.match(digest) + if match is None or match.end() != len(digest): + raise InvalidDigestException('Not a valid digest: %s', digest) + + return Digest(match.group(1), match.group(2)) + + @property + def hash_alg(self): + return self._hash_alg + + @property + def hash_bytes(self): + return self._hash_bytes def content_path(digest): """ Returns a relative path to the parsed digest. """ - parsed = parse_digest(digest) + parsed = Digest.parse_digest(digest) components = [] - if parsed.is_tarsum: - components.extend(['tarsum', parsed.tarsum_version]) - # Generate a prefix which is always two characters, and which will be filled with leading zeros # if the input does not contain at least two characters. e.g. ABC -> AB, A -> 0A prefix = parsed.hash_bytes[0:2].zfill(2) - components.extend([parsed.hash_alg, prefix, parsed.hash_bytes]) - + pathish = REPLACE_WITH_PATH.sub('/', parsed.hash_alg) + normalized = REPLACE_DOUBLE_SLASHES.sub('/', pathish).lstrip('/') + components.extend([normalized, prefix, parsed.hash_bytes]) return os.path.join(*components) @@ -58,7 +72,11 @@ def sha256_digest_from_generator(content_generator): return 'sha256:{0}'.format(digest.hexdigest()) +def sha256_digest_from_hashlib(sha256_hash_obj): + return 'sha256:{0}'.format(sha256_hash_obj.hexdigest()) + + def digests_equal(lhs_digest_string, rhs_digest_string): """ Parse and compare the two digests, returns True if the digests are equal, False otherwise. """ - return parse_digest(lhs_digest_string) == parse_digest(rhs_digest_string) + return Digest.parse_digest(lhs_digest_string) == Digest.parse_digest(rhs_digest_string) diff --git a/endpoints/api/tag.py b/endpoints/api/tag.py index 4233144ef..416d3e3f3 100644 --- a/endpoints/api/tag.py +++ b/endpoints/api/tag.py @@ -154,10 +154,7 @@ class RepositoryTagImages(RepositoryParamResource): image_map[str(image.id)] = image image_map_all = dict(image_map) - - parents = list(parent_images) - parents.reverse() - all_images = [tag_image] + parents + all_images = [tag_image] + list(parent_images) # Filter the images returned to those not found in the ancestry of any of the other tags in # the repository. diff --git a/endpoints/v1/registry.py b/endpoints/v1/registry.py index de4dc9192..d62cada5b 100644 --- a/endpoints/v1/registry.py +++ b/endpoints/v1/registry.py @@ -13,6 +13,7 @@ from auth.auth_context import get_authenticated_user, get_grant_user_context from digest import checksums from util.registry import changes from util.http import abort, exact_abort +from util.registry.filelike import SocketReader from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission) from data import model, database @@ -24,26 +25,6 @@ from endpoints.decorators import anon_protect logger = logging.getLogger(__name__) -class SocketReader(object): - def __init__(self, fp): - self._fp = fp - self.handlers = [] - - def add_handler(self, handler): - self.handlers.append(handler) - - def read(self, n=-1): - buf = self._fp.read(n) - if not buf: - return '' - for handler in self.handlers: - handler(buf) - return buf - - def tell(self): - raise IOError('Stream is not seekable.') - - def image_is_uploading(repo_image): if repo_image is None: return False @@ -159,11 +140,9 @@ def get_image_layer(namespace, repository, image_id, headers): abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - logger.debug('Looking up the layer path') try: - path = store.image_layer_path(repo_image.storage.uuid) - - logger.debug('Looking up the direct download URL') + path = model.storage.get_layer_path(repo_image.storage) + logger.debug('Looking up the direct download URL for path: %s', path) direct_download_url = store.get_direct_download_url(repo_image.storage.locations, path) if direct_download_url: @@ -205,8 +184,9 @@ def put_image_layer(namespace, repository, image_id): logger.exception('Exception when retrieving image data') abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - logger.debug('Retrieving image path info') - layer_path = store.image_layer_path(uuid) + uuid = repo_image.storage.uuid + layer_path = store.v1_image_layer_path(uuid) + logger.info('Storing layer at v1 path: %s', layer_path) if (store.exists(repo_image.storage.locations, layer_path) and not image_is_uploading(repo_image)): @@ -393,37 +373,22 @@ def get_image_ancestry(namespace, repository, image_id, headers): if not permission.can() and not model.repository.repository_is_public(namespace, repository): abort(403) - logger.debug('Looking up repo image') - repo_image = model.image.get_repo_image_extended(namespace, repository, image_id) - - logger.debug('Looking up image data') try: - uuid = repo_image.storage.uuid - data = store.get_content(repo_image.storage.locations, store.image_ancestry_path(uuid)) - except (IOError, AttributeError): - abort(404, 'Image %(image_id)s not found', issue='unknown-image', - image_id=image_id) + image = model.image.get_image_by_id(namespace, repository, image_id) + except model.InvalidImageException: + abort(404, 'Image %(image_id)s not found', issue='unknown-image', image_id=image_id) - logger.debug('Converting to <-> from JSON') - response = make_response(json.dumps(json.loads(data)), 200) + parents = model.image.get_parent_images(namespace, repository, image) + + ancestry_docker_ids = [image.docker_image_id] + ancestry_docker_ids.extend([parent.docker_image_id for parent in parents]) + + # We can not use jsonify here because we are returning a list not an object + response = make_response(json.dumps(ancestry_docker_ids), 200) response.headers.extend(headers) - - logger.debug('Done') return response -def generate_ancestry(image_id, uuid, locations, parent_id=None, parent_uuid=None, - parent_locations=None): - if not parent_id: - store.put_content(locations, store.image_ancestry_path(uuid), json.dumps([image_id])) - return - - data = store.get_content(parent_locations, store.image_ancestry_path(parent_uuid)) - data = json.loads(data) - data.insert(0, image_id) - store.put_content(locations, store.image_ancestry_path(uuid), json.dumps(data)) - - def store_checksum(image_with_storage, checksum, content_checksum): checksum_parts = checksum.split(':') if len(checksum_parts) != 2: @@ -486,22 +451,16 @@ def put_image_json(namespace, repository, image_id): abort(400, 'JSON data contains invalid id for image: %(image_id)s', issue='invalid-request', image_id=image_id) - parent_id = data.get('parent') + parent_id = data.get('parent', None) parent_image = None if parent_id: logger.debug('Looking up parent image') parent_image = model.image.get_repo_image_extended(namespace, repository, parent_id) - parent_uuid = parent_image and parent_image.storage.uuid - parent_locations = parent_image and parent_image.storage.locations - - if parent_id: - logger.debug('Looking up parent image data') - - if parent_id and not parent_image.v1_json_metadata: - abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s', - issue='invalid-request', image_id=image_id, parent_id=parent_id) + if not parent_image or parent_image.storage.uploading: + abort(400, 'Image %(image_id)s depends on non existing parent image %(parent_id)s', + issue='invalid-request', image_id=image_id, parent_id=parent_id) logger.debug('Checking if image already exists') if repo_image.v1_json_metadata and not image_is_uploading(repo_image): @@ -510,8 +469,7 @@ def put_image_json(namespace, repository, image_id): set_uploading_flag(repo_image, True) # If we reach that point, it means that this is a new image or a retry - # on a failed push - # save the metadata + # on a failed push, save the metadata command_list = data.get('container_config', {}).get('Cmd', None) command = json.dumps(command_list) if command_list else None @@ -519,16 +477,6 @@ def put_image_json(namespace, repository, image_id): model.image.set_image_metadata(image_id, namespace, repository, data.get('created'), data.get('comment'), command, v1_metadata, parent_image) - logger.debug('Generating image ancestry') - - try: - generate_ancestry(image_id, repo_image.storage.uuid, repo_image.storage.locations, parent_id, - parent_uuid, parent_locations) - except IOError as ioe: - logger.debug('Error when generating ancestry: %s', ioe.message) - abort(404) - - logger.debug('Done') return make_response('true', 200) @@ -556,7 +504,7 @@ def process_image_changes(namespace, repository, image_id): parent_trie_path = None if parents: parent_trie_path, parent_locations = process_image_changes(namespace, repository, - parents[-1].docker_image_id) + parents[0].docker_image_id) # Read in the collapsed layer state of the filesystem for the parent parent_trie = changes.empty_fs() @@ -565,7 +513,7 @@ def process_image_changes(namespace, repository, image_id): parent_trie.frombytes(parent_trie_bytes) # Read in the file entries from the layer tar file - layer_path = store.image_layer_path(uuid) + layer_path = model.storage.get_layer_path(repo_image.storage) with store.stream_read_file(image.storage.locations, layer_path) as layer_tar_stream: removed_files = set() layer_files = changes.files_and_dirs_from_tar(layer_tar_stream, diff --git a/endpoints/v1/tag.py b/endpoints/v1/tag.py index 094c905df..865644e2e 100644 --- a/endpoints/v1/tag.py +++ b/endpoints/v1/tag.py @@ -40,7 +40,11 @@ def get_tag(namespace, repository, tag): permission = ReadRepositoryPermission(namespace, repository) if permission.can() or model.repository.repository_is_public(namespace, repository): - tag_image = model.tag.get_tag_image(namespace, repository, tag) + try: + tag_image = model.tag.get_tag_image(namespace, repository, tag) + except model.DataModelException: + abort(404) + resp = make_response('"%s"' % tag_image.docker_image_id) resp.headers['Content-Type'] = 'application/json' return resp diff --git a/endpoints/v2/__init__.py b/endpoints/v2/__init__.py index c1c7c22c0..846a304e7 100644 --- a/endpoints/v2/__init__.py +++ b/endpoints/v2/__init__.py @@ -1,14 +1,14 @@ -# XXX This code is not yet ready to be run in production, and should remain disabled until such -# XXX time as this notice is removed. - import logging -from flask import Blueprint, make_response, url_for, request +from flask import Blueprint, make_response, url_for, request, jsonify from functools import wraps from urlparse import urlparse +import features + from app import metric_queue from endpoints.decorators import anon_protect, anon_allowed +from endpoints.v2.errors import V2RegistryException from auth.jwt_auth import process_jwt_auth from auth.auth_context import get_grant_user_context from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermission, @@ -16,6 +16,8 @@ from auth.permissions import (ReadRepositoryPermission, ModifyRepositoryPermissi from data import model from util.http import abort from util.saas.metricqueue import time_blueprint +from util import get_app_url +from app import app logger = logging.getLogger(__name__) @@ -23,6 +25,16 @@ v2_bp = Blueprint('v2', __name__) time_blueprint(v2_bp, metric_queue) +@v2_bp.app_errorhandler(V2RegistryException) +def handle_registry_v2_exception(error): + response = jsonify({ + 'errors': [error.as_dict()] + }) + response.status_code = error.http_status_code + logger.debug('sending response: %s', response.get_data()) + return response + + def _require_repo_permission(permission_class, allow_public=False): def wrapper(func): @wraps(func) @@ -49,7 +61,20 @@ def get_input_stream(flask_request): return flask_request.stream +# TODO remove when v2 is deployed everywhere +def route_show_if(value): + def decorator(f): + @wraps(f) + def decorated_function(*args, **kwargs): + if not value: + abort(404) + + return f(*args, **kwargs) + return decorated_function + return decorator + @v2_bp.route('/') +@route_show_if(features.ADVERTISE_V2) @process_jwt_auth @anon_allowed def v2_support_enabled(): @@ -57,9 +82,11 @@ def v2_support_enabled(): if get_grant_user_context() is None: response = make_response('true', 401) - realm_hostname = urlparse(request.url).netloc realm_auth_path = url_for('v2.generate_registry_jwt') - authenticate = 'Bearer realm="{0}{1}",service="quay"'.format(realm_hostname, realm_auth_path) + + authenticate = 'Bearer realm="{0}{1}",service="{2}"'.format(get_app_url(app.config), + realm_auth_path, + app.config['SERVER_HOSTNAME']) response.headers['WWW-Authenticate'] = authenticate response.headers['Docker-Distribution-API-Version'] = 'registry/2.0' @@ -69,3 +96,5 @@ def v2_support_enabled(): from endpoints.v2 import v2auth from endpoints.v2 import manifest from endpoints.v2 import blob +from endpoints.v2 import tag +from endpoints.v2 import catalog diff --git a/endpoints/v2/blob.py b/endpoints/v2/blob.py index 1c5639ab0..58598f464 100644 --- a/endpoints/v2/blob.py +++ b/endpoints/v2/blob.py @@ -1,17 +1,18 @@ -# XXX This code is not yet ready to be run in production, and should remain disabled until such -# XXX time as this notice is removed. - import logging +import re -from flask import make_response, url_for, request +from flask import make_response, url_for, request, redirect, Response, abort as flask_abort from app import storage, app -from data import model +from data import model, database from digest import digest_tools from endpoints.v2 import v2_bp, require_repo_read, require_repo_write, get_input_stream +from endpoints.v2.errors import BlobUnknown, BlobUploadInvalid, BlobUploadUnknown, Unsupported from auth.jwt_auth import process_jwt_auth from endpoints.decorators import anon_protect -from util.http import abort +from util.cache import cache_control +from util.registry.filelike import wrap_with_handler, StreamSlice +from storage.basestorage import InvalidChunkException logger = logging.getLogger(__name__) @@ -19,29 +20,78 @@ logger = logging.getLogger(__name__) BASE_BLOB_ROUTE = '///blobs/' BLOB_DIGEST_ROUTE = BASE_BLOB_ROUTE.format(digest_tools.DIGEST_PATTERN) +RANGE_HEADER_REGEX = re.compile(r'^bytes=([0-9]+)-([0-9]+)$') + + +class _InvalidRangeHeader(Exception): + pass + + +def _base_blob_fetch(namespace, repo_name, digest): + """ Some work that is common to both GET and HEAD requests. Callers MUST check for proper + authorization before calling this method. + """ + try: + found = model.blob.get_repo_blob_by_digest(namespace, repo_name, digest) + except model.BlobDoesNotExist: + raise BlobUnknown() + + headers = { + 'Docker-Content-Digest': digest, + 'Content-Length': found.image_size, + } + + # Add the Accept-Ranges header if the storage engine supports resumable + # downloads. + if storage.get_supports_resumable_downloads(found.locations): + logger.debug('Storage supports resumable downloads') + headers['Accept-Ranges'] = 'bytes' + + return found, headers @v2_bp.route(BLOB_DIGEST_ROUTE, methods=['HEAD']) @process_jwt_auth @require_repo_read @anon_protect -def check_blob_existence(namespace, repo_name, digest): - try: - found = model.blob.get_repo_blob_by_digest(namespace, repo_name, digest) +@cache_control(max_age=31436000) +def check_blob_exists(namespace, repo_name, digest): + _, headers = _base_blob_fetch(namespace, repo_name, digest) - # The response body must be empty for a successful HEAD request - return make_response('') - except model.BlobDoesNotExist: - abort(404) + response = make_response('') + response.headers.extend(headers) + response.headers['Content-Length'] = headers['Content-Length'] + return response @v2_bp.route(BLOB_DIGEST_ROUTE, methods=['GET']) @process_jwt_auth @require_repo_read @anon_protect +@cache_control(max_age=31536000) def download_blob(namespace, repo_name, digest): - # TODO Implement this - return make_response('') + found, headers = _base_blob_fetch(namespace, repo_name, digest) + + path = model.storage.get_layer_path(found) + logger.debug('Looking up the direct download URL for path: %s', path) + direct_download_url = storage.get_direct_download_url(found.locations, path) + + if direct_download_url: + logger.debug('Returning direct download URL') + resp = redirect(direct_download_url) + resp.headers.extend(headers) + return resp + + logger.debug('Streaming layer data') + + # Close the database handle here for this process before we send the long download. + database.close_db_filter(None) + + return Response(storage.stream_read(found.locations, path), headers=headers) + + +def _render_range(num_uploaded_bytes, with_bytes_prefix=True): + return '{0}0-{1}'.format('bytes=' if with_bytes_prefix else '', num_uploaded_bytes - 1) @v2_bp.route('///blobs/uploads/', methods=['POST']) @@ -49,12 +99,162 @@ def download_blob(namespace, repo_name, digest): @require_repo_write @anon_protect def start_blob_upload(namespace, repo_name): - new_upload_uuid = storage.initiate_chunked_upload(storage.preferred_locations[0]) - accepted = make_response('', 202) - accepted.headers['Location'] = url_for('v2.upload_chunk', namespace=namespace, - repo_name=repo_name, upload_uuid=new_upload_uuid) - accepted.headers['Range'] = 'bytes=0-0' - accepted.headers['Docker-Upload-UUID'] = new_upload_uuid + location_name = storage.preferred_locations[0] + new_upload_uuid, upload_metadata = storage.initiate_chunked_upload(location_name) + model.blob.initiate_upload(namespace, repo_name, new_upload_uuid, location_name, upload_metadata) + + digest = request.args.get('digest', None) + if digest is None: + # The user will send the blob data in another request + accepted = make_response('', 202) + accepted.headers['Location'] = url_for('v2.upload_chunk', namespace=namespace, + repo_name=repo_name, upload_uuid=new_upload_uuid) + accepted.headers['Range'] = _render_range(0) + accepted.headers['Docker-Upload-UUID'] = new_upload_uuid + return accepted + else: + # The user plans to send us the entire body right now + uploaded = _upload_chunk(namespace, repo_name, new_upload_uuid) + uploaded.save() + + return _finish_upload(namespace, repo_name, uploaded, digest) + + +@v2_bp.route('///blobs/uploads/', methods=['GET']) +@process_jwt_auth +@require_repo_write +@anon_protect +def fetch_existing_upload(namespace, repo_name, upload_uuid): + try: + found = model.blob.get_blob_upload(namespace, repo_name, upload_uuid) + except model.InvalidBlobUpload: + raise BlobUploadUnknown() + + # Note: Docker byte ranges are exclusive so we have to add one to the byte count. + accepted = make_response('', 204) + accepted.headers['Range'] = _render_range(found.byte_count + 1) + accepted.headers['Docker-Upload-UUID'] = upload_uuid + return accepted + + +def _current_request_path(): + return '{0}{1}'.format(request.script_root, request.path) + + +def _range_not_satisfiable(valid_end): + invalid_range = make_response('', 416) + invalid_range.headers['Location'] = _current_request_path() + invalid_range.headers['Range'] = '0-{0}'.format(valid_end) + invalid_range.headers['Docker-Upload-UUID'] = request.view_args['upload_uuid'] + flask_abort(invalid_range) + + +def _parse_range_header(range_header_text): + """ Parses the range header, and returns a tuple of the start offset and the length, + or raises an _InvalidRangeHeader exception. + """ + found = RANGE_HEADER_REGEX.match(range_header_text) + if found is None: + raise _InvalidRangeHeader() + + start = int(found.group(1)) + length = int(found.group(2)) - start + + if length <= 0: + raise _InvalidRangeHeader() + + return (start, length) + + +def _upload_chunk(namespace, repo_name, upload_uuid): + """ Common code among the various uploading paths for appending data to blobs. + Callers MUST call .save() or .delete_instance() on the returned database object. + """ + try: + found = model.blob.get_blob_upload(namespace, repo_name, upload_uuid) + except model.InvalidBlobUpload: + raise BlobUploadUnknown() + + start_offset, length = 0, -1 + range_header = request.headers.get('range', None) + if range_header is not None: + try: + start_offset, length = _parse_range_header(range_header) + except _InvalidRangeHeader: + _range_not_satisfiable(found.byte_count) + + if start_offset > 0 and start_offset > found.byte_count: + _range_not_satisfiable(found.byte_count) + + input_fp = get_input_stream(request) + + if start_offset > 0 and start_offset < found.byte_count: + # Skip the bytes which were received on a previous push, which are already stored and + # included in the sha calculation + input_fp = StreamSlice(input_fp, found.byte_count - start_offset) + start_offset = found.byte_count + + input_fp = wrap_with_handler(input_fp, found.sha_state.update) + + try: + length_written, new_metadata = storage.stream_upload_chunk({found.location.name}, upload_uuid, + start_offset, length, input_fp, + found.storage_metadata) + except InvalidChunkException: + _range_not_satisfiable(found.byte_count) + + found.storage_metadata = new_metadata + found.byte_count += length_written + return found + + +def _finish_upload(namespace, repo_name, upload_obj, expected_digest): + # Verify that the digest's SHA matches that of the uploaded data. + computed_digest = digest_tools.sha256_digest_from_hashlib(upload_obj.sha_state) + if not digest_tools.digests_equal(computed_digest, expected_digest): + raise BlobUploadInvalid() + + # Move the storage into place, or if this was a re-upload, cancel it + final_blob_location = digest_tools.content_path(expected_digest) + + if storage.exists({upload_obj.location.name}, final_blob_location): + # It already existed, clean up our upload which served as proof that we had the file + storage.cancel_chunked_upload({upload_obj.location.name}, upload_obj.uuid, + upload_obj.storage_metadata) + + else: + # We were the first ones to upload this image (at least to this location) + # Let's copy it into place + storage.complete_chunked_upload({upload_obj.location.name}, upload_obj.uuid, + final_blob_location, upload_obj.storage_metadata) + + # Mark the blob as uploaded. + model.blob.store_blob_record_and_temp_link(namespace, repo_name, expected_digest, + upload_obj.location, upload_obj.byte_count, + app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) + + # Delete the upload tracking row. + upload_obj.delete_instance() + + response = make_response('', 201) + response.headers['Docker-Content-Digest'] = expected_digest + response.headers['Location'] = url_for('v2.download_blob', namespace=namespace, + repo_name=repo_name, digest=expected_digest) + return response + + +@v2_bp.route('///blobs/uploads/', methods=['PATCH']) +@process_jwt_auth +@require_repo_write +@anon_protect +def upload_chunk(namespace, repo_name, upload_uuid): + upload = _upload_chunk(namespace, repo_name, upload_uuid) + upload.save() + + accepted = make_response('', 204) + accepted.headers['Location'] = _current_request_path() + accepted.headers['Range'] = _render_range(upload.byte_count, with_bytes_prefix=False) + accepted.headers['Docker-Upload-UUID'] = upload_uuid return accepted @@ -62,22 +262,40 @@ def start_blob_upload(namespace, repo_name): @process_jwt_auth @require_repo_write @anon_protect -def upload_chunk(namespace, repo_name, upload_uuid): +def monolithic_upload_or_last_chunk(namespace, repo_name, upload_uuid): digest = request.args.get('digest', None) - upload_location = storage.preferred_locations[0] - bytes_written = storage.stream_upload_chunk(upload_location, upload_uuid, 0, -1, - get_input_stream(request)) + if digest is None: + raise BlobUploadInvalid() - if digest is not None: - final_blob_location = digest_tools.content_path(digest) - storage.complete_chunked_upload(upload_location, upload_uuid, final_blob_location, digest) - model.blob.store_blob_record_and_temp_link(namespace, repo_name, digest, upload_location, - app.config['PUSH_TEMP_TAG_EXPIRATION_SEC']) + found = _upload_chunk(namespace, repo_name, upload_uuid) + return _finish_upload(namespace, repo_name, found, digest) + + +@v2_bp.route('///blobs/uploads/', methods=['DELETE']) +@process_jwt_auth +@require_repo_write +@anon_protect +def cancel_upload(namespace, repo_name, upload_uuid): + try: + found = model.blob.get_blob_upload(namespace, repo_name, upload_uuid) + except model.InvalidBlobUpload: + raise BlobUploadUnknown() + + # We delete the record for the upload first, since if the partial upload in + # storage fails to delete, it doesn't break anything + found.delete_instance() + storage.cancel_chunked_upload({found.location.name}, found.uuid, found.storage_metadata) + + return make_response('', 204) + + + +@v2_bp.route('///blobs/', methods=['DELETE']) +@process_jwt_auth +@require_repo_write +@anon_protect +def delete_digest(namespace, repo_name, upload_uuid): + # We do not support deleting arbitrary digests, as they break repo images. + raise Unsupported() - response = make_response('', 201) - response.headers['Docker-Content-Digest'] = digest - response.headers['Location'] = url_for('v2.download_blob', namespace=namespace, - repo_name=repo_name, digest=digest) - return response - return make_response('', 202) diff --git a/endpoints/v2/catalog.py b/endpoints/v2/catalog.py new file mode 100644 index 000000000..c483a8d41 --- /dev/null +++ b/endpoints/v2/catalog.py @@ -0,0 +1,29 @@ +from flask import jsonify, url_for + +from endpoints.v2 import v2_bp +from auth.auth import process_auth +from endpoints.decorators import anon_protect +from data import model +from endpoints.v2.v2util import add_pagination +from auth.auth_context import get_authenticated_user + +@v2_bp.route('/_catalog', methods=['GET']) +@process_auth +@anon_protect +def catalog_search(): + url = url_for('v2.catalog_search') + + username = get_authenticated_user().username if get_authenticated_user() else None + query = model.repository.get_visible_repositories(username, include_public=(username is None), + limit=50) + + link, query = add_pagination(query, url) + + response = jsonify({ + 'repositories': ['%s/%s' % (repo.namespace_user.username, repo.name) for repo in query], + }) + + if link is not None: + response.headers['Link'] = link + + return response diff --git a/endpoints/v2/errors.py b/endpoints/v2/errors.py new file mode 100644 index 000000000..f24905f68 --- /dev/null +++ b/endpoints/v2/errors.py @@ -0,0 +1,118 @@ +class V2RegistryException(Exception): + def __init__(self, error_code_str, message, detail, http_status_code=400): + super(V2RegistryException, self).__init__(message) + self.http_status_code = http_status_code + + self._error_code_str = error_code_str + self._detail = detail + + def as_dict(self): + return { + 'code': self._error_code_str, + 'message': self.message, + 'detail': self._detail if self._detail is not None else {}, + } + + +class BlobUnknown(V2RegistryException): + def __init__(self, detail=None): + super(BlobUnknown, self).__init__('BLOB_UNKNOWN', + 'blob unknown to registry', + detail, + 404) + + +class BlobUploadInvalid(V2RegistryException): + def __init__(self, detail=None): + super(BlobUploadInvalid, self).__init__('BLOB_UPLOAD_INVALID', + 'blob upload invalid', + detail) + + +class BlobUploadUnknown(V2RegistryException): + def __init__(self, detail=None): + super(BlobUploadUnknown, self).__init__('BLOB_UPLOAD_UNKNOWN', + 'blob upload unknown to registry', + detail, + 404) + + +class DigestInvalid(V2RegistryException): + def __init__(self, detail=None): + super(DigestInvalid, self).__init__('DIGEST_INVALID', + 'provided digest did not match uploaded content', + detail) + + +class ManifestBlobUnknown(V2RegistryException): + def __init__(self, detail=None): + super(ManifestBlobUnknown, self).__init__('MANIFEST_BLOB_UNKNOWN', + 'manifest blob unknown to registry', + detail) + + +class ManifestInvalid(V2RegistryException): + def __init__(self, detail=None): + super(ManifestInvalid, self).__init__('MANIFEST_INVALID', + 'manifest invalid', + detail) + + +class ManifestUnknown(V2RegistryException): + def __init__(self, detail=None): + super(ManifestUnknown, self).__init__('MANIFEST_UNKNOWN', + 'manifest unknown', + detail, + 404) + + +class ManifestUnverified(V2RegistryException): + def __init__(self, detail=None): + super(ManifestUnverified, self).__init__('MANIFEST_UNVERIFIED', + 'manifest failed signature verification', + detail) + + +class NameInvalid(V2RegistryException): + def __init__(self, detail=None): + super(NameInvalid, self).__init__('NAME_INVALID', + 'invalid repository name', + detail) + + +class NameUnknown(V2RegistryException): + def __init__(self, detail=None): + super(NameUnknown, self).__init__('NAME_UNKNOWN', + 'repository name not known to registry', + detail, + 404) + + +class SizeInvalid(V2RegistryException): + def __init__(self, detail=None): + super(SizeInvalid, self).__init__('SIZE_INVALID', + 'provided length did not match content length', + detail) + + +class TagInvalid(V2RegistryException): + def __init__(self, detail=None): + super(TagInvalid, self).__init__('TAG_INVALID', + 'manifest tag did not match URI', + detail) + + +class Unauthorized(V2RegistryException): + def __init__(self, detail=None): + super(Unauthorized, self).__init__('UNAUTHORIZED', + 'access to the requested resource is not authorized', + detail, + 401) + + +class Unsupported(V2RegistryException): + def __init__(self, detail=None): + super(Unsupported, self).__init__('UNSUPPORTED', + 'The operation is unsupported.', + detail, + 405) diff --git a/endpoints/v2/manifest.py b/endpoints/v2/manifest.py index 10868f3c9..935045734 100644 --- a/endpoints/v2/manifest.py +++ b/endpoints/v2/manifest.py @@ -1,47 +1,79 @@ -# XXX This code is not yet ready to be run in production, and should remain disabled until such -# XXX time as this notice is removed. - import logging -import re import jwt.utils -import yaml +import json -from flask import make_response, request +from flask import make_response, request, url_for +from collections import namedtuple, OrderedDict +from jwkest.jws import SIGNER_ALGS, keyrep +from datetime import datetime -from app import storage +from app import docker_v2_signing_key from auth.jwt_auth import process_jwt_auth from endpoints.decorators import anon_protect -from endpoints.v2 import v2_bp, require_repo_read, require_repo_write, get_input_stream +from endpoints.v2 import v2_bp, require_repo_read, require_repo_write +from endpoints.v2.errors import (BlobUnknown, ManifestInvalid, ManifestUnverified, + ManifestUnknown, TagInvalid, NameInvalid) +from endpoints.trackhelper import track_and_log +from endpoints.notificationhelper import spawn_notification from digest import digest_tools +from data import model logger = logging.getLogger(__name__) VALID_TAG_PATTERN = r'[\w][\w.-]{0,127}' -VALID_TAG_REGEX = re.compile(VALID_TAG_PATTERN) + +BASE_MANIFEST_ROUTE = '///manifests/' +MANIFEST_DIGEST_ROUTE = BASE_MANIFEST_ROUTE.format(digest_tools.DIGEST_PATTERN) +MANIFEST_TAGNAME_ROUTE = BASE_MANIFEST_ROUTE.format(VALID_TAG_PATTERN) + + +ISO_DATETIME_FORMAT_ZULU = '%Y-%m-%dT%H:%M:%SZ' +JWS_ALGORITHM = 'RS256' + + +ImageMetadata = namedtuple('ImageMetadata', ['digest', 'v1_metadata', 'v1_metadata_str']) +ExtractedV1Metadata = namedtuple('ExtractedV1Metadata', ['docker_id', 'parent', 'created', + 'comment', 'command']) + + +_SIGNATURES_KEY = 'signatures' +_PROTECTED_KEY = 'protected' +_FORMAT_LENGTH_KEY = 'formatLength' +_FORMAT_TAIL_KEY = 'formatTail' +_REPO_NAME_KEY = 'name' +_REPO_TAG_KEY = 'tag' +_FS_LAYERS_KEY = 'fsLayers' +_HISTORY_KEY = 'history' +_BLOB_SUM_KEY = 'blobSum' +_V1_COMPAT_KEY = 'v1Compatibility' +_ARCH_KEY = 'architecture' +_SCHEMA_VER = 'schemaVersion' class SignedManifest(object): - SIGNATURES_KEY = 'signatures' - PROTECTED_KEY = 'protected' - FORMAT_LENGTH_KEY = 'formatLength' - FORMAT_TAIL_KEY = 'formatTail' - REPO_NAME_KEY = 'name' - REPO_TAG_KEY = 'tag' def __init__(self, manifest_bytes): self._bytes = manifest_bytes - parsed = yaml.safe_load(manifest_bytes) - self._signatures = parsed[self.SIGNATURES_KEY] - self._namespace, self._repo_name = parsed[self.REPO_NAME_KEY].split('/') - self._tag = parsed[self.REPO_TAG_KEY] + self._parsed = json.loads(manifest_bytes) + self._signatures = self._parsed[_SIGNATURES_KEY] + self._namespace, self._repo_name = self._parsed[_REPO_NAME_KEY].split('/') + self._tag = self._parsed[_REPO_TAG_KEY] self._validate() def _validate(self): - pass + for signature in self._signatures: + bytes_to_verify = '{0}.{1}'.format(signature['protected'], jwt.utils.base64url_encode(self.payload)) + signer = SIGNER_ALGS[signature['header']['alg']] + key = keyrep(signature['header']['jwk']) + gk = key.get_key() + sig = jwt.utils.base64url_decode(signature['signature'].encode('utf-8')) + verified = signer.verify(bytes_to_verify, sig, gk) + if not verified: + raise ValueError('manifest file failed signature verification') @property def signatures(self): @@ -59,52 +91,306 @@ class SignedManifest(object): def tag(self): return self._tag + @property + def bytes(self): + return self._bytes + + @property + def digest(self): + return digest_tools.sha256_digest(self.payload) + + @property + def layers(self): + """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them, + starting from the leaf image and working toward the base node. + """ + for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY], + self._parsed[_HISTORY_KEY])): + + try: + image_digest = digest_tools.Digest.parse_digest(blob_sum_obj[_BLOB_SUM_KEY]) + except digest_tools.InvalidDigestException: + raise ManifestInvalid() + + metadata_string = history_obj[_V1_COMPAT_KEY] + + v1_metadata = json.loads(metadata_string) + command_list = v1_metadata.get('container_config', {}).get('Cmd', None) + command = json.dumps(command_list) if command_list else None + + extracted = ExtractedV1Metadata(v1_metadata['id'], v1_metadata.get('parent'), + v1_metadata.get('created'), v1_metadata.get('comment'), + command) + yield ImageMetadata(image_digest, extracted, metadata_string) + @property def payload(self): - protected = self._signatures[0][self.PROTECTED_KEY] - parsed_protected = yaml.safe_load(jwt.utils.base64url_decode(protected)) + protected = str(self._signatures[0][_PROTECTED_KEY]) + + parsed_protected = json.loads(jwt.utils.base64url_decode(protected)) logger.debug('parsed_protected: %s', parsed_protected) - signed_content_head = self._bytes[:parsed_protected[self.FORMAT_LENGTH_KEY]] + + signed_content_head = self._bytes[:parsed_protected[_FORMAT_LENGTH_KEY]] logger.debug('signed content head: %s', signed_content_head) - signed_content_tail = jwt.utils.base64url_decode(parsed_protected[self.FORMAT_TAIL_KEY]) + + signed_content_tail = jwt.utils.base64url_decode(str(parsed_protected[_FORMAT_TAIL_KEY])) logger.debug('signed content tail: %s', signed_content_tail) return signed_content_head + signed_content_tail -@v2_bp.route('///manifests/', - methods=['GET']) +class SignedManifestBuilder(object): + """ Class which represents a manifest which is currently being built. + """ + def __init__(self, namespace, repo_name, tag, architecture='amd64', schema_ver=1): + self._base_payload = { + _REPO_TAG_KEY: tag, + _REPO_NAME_KEY: '{0}/{1}'.format(namespace, repo_name), + _ARCH_KEY: architecture, + _SCHEMA_VER: schema_ver, + } + + self._fs_layer_digests = [] + self._history = [] + + def add_layer(self, layer_digest, v1_json_metadata): + self._fs_layer_digests.append({ + _BLOB_SUM_KEY: layer_digest, + }) + self._history.append({ + _V1_COMPAT_KEY: v1_json_metadata, + }) + + def build(self, json_web_key): + """ Build the payload and sign it, returning a SignedManifest object. + """ + payload = OrderedDict(self._base_payload) + payload.update({ + _HISTORY_KEY: self._history, + _FS_LAYERS_KEY: self._fs_layer_digests, + }) + + payload_str = json.dumps(payload, indent=3) + + split_point = payload_str.rfind('\n}') + + protected_payload = { + 'formatTail': jwt.utils.base64url_encode(payload_str[split_point:]), + 'formatLength': split_point, + 'time': datetime.utcnow().strftime(ISO_DATETIME_FORMAT_ZULU), + } + protected = jwt.utils.base64url_encode(json.dumps(protected_payload)) + logger.debug('Generated protected block: %s', protected) + + bytes_to_sign = '{0}.{1}'.format(protected, jwt.utils.base64url_encode(payload_str)) + + signer = SIGNER_ALGS[JWS_ALGORITHM] + signature = jwt.utils.base64url_encode(signer.sign(bytes_to_sign, json_web_key.get_key())) + logger.debug('Generated signature: %s', signature) + + public_members = set(json_web_key.public_members) + public_key = {comp: value for comp, value in json_web_key.to_dict().items() + if comp in public_members} + + signature_block = { + 'header': { + 'jwk': public_key, + 'alg': JWS_ALGORITHM, + }, + 'signature': signature, + _PROTECTED_KEY: protected, + } + + logger.debug('Encoded signature block: %s', json.dumps(signature_block)) + + payload.update({ + _SIGNATURES_KEY: [signature_block], + }) + + return SignedManifest(json.dumps(payload, indent=3)) + + +@v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['GET']) @process_jwt_auth @require_repo_read @anon_protect -def fetch_manifest_by_tagname(namespace, repo_name, tag_name): - logger.debug('Fetching tag manifest with name: %s', tag_name) - return make_response('Manifest {0}'.format(tag_name)) +def fetch_manifest_by_tagname(namespace, repo_name, manifest_ref): + try: + manifest = model.tag.load_tag_manifest(namespace, repo_name, manifest_ref) + except model.InvalidManifestException: + try: + manifest = _generate_and_store_manifest(namespace, repo_name, manifest_ref) + except model.DataModelException: + logger.exception('Exception when generating manifest for %s/%s:%s', namespace, repo_name, + manifest_ref) + raise ManifestUnknown() + + return make_response(manifest.json_data, 200) -@v2_bp.route('///manifests/', - methods=['PUT']) +@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['GET']) +@process_jwt_auth +@require_repo_read +@anon_protect +def fetch_manifest_by_digest(namespace, repo_name, manifest_ref): + try: + manifest = model.tag.load_manifest_by_digest(namespace, repo_name, manifest_ref) + except model.InvalidManifestException: + # Without a tag name to reference, we can't make an attempt to generate the manifest + raise ManifestUnknown() + + return make_response(manifest.json_data, 200) + + +@v2_bp.route(MANIFEST_TAGNAME_ROUTE, methods=['PUT']) @process_jwt_auth @require_repo_write @anon_protect -def write_manifest_by_tagname(namespace, repo_name, tag_name): - manifest = SignedManifest(request.data) - manifest_digest = digest_tools.sha256_digest(manifest.payload) +def write_manifest_by_tagname(namespace, repo_name, manifest_ref): + try: + manifest = SignedManifest(request.data) + except ValueError: + raise ManifestInvalid() + + if manifest.tag != manifest_ref: + raise TagInvalid() + + return _write_manifest(namespace, repo_name, manifest) + + +@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['PUT']) +@process_jwt_auth +@require_repo_write +@anon_protect +def write_manifest_by_digest(namespace, repo_name, manifest_ref): + try: + manifest = SignedManifest(request.data) + except ValueError: + raise ManifestInvalid() + + if manifest.digest != manifest_ref: + raise ManifestInvalid() + + return _write_manifest(namespace, repo_name, manifest) + + +def _write_manifest(namespace, repo_name, manifest): + # Ensure that the manifest is for this repository. + if manifest.namespace != namespace or manifest.repo_name != repo_name: + raise NameInvalid() + + # Ensure that the repository exists. + repo = model.repository.get_repository(namespace, repo_name) + if repo is None: + raise NameInvalid() + + # Lookup all the images and their parent images (if any) inside the manifest. This will let us + # know which V1 images we need to synthesize and which ones are invalid. + layers = list(manifest.layers) + + docker_image_ids = [mdata.v1_metadata.docker_id for mdata in layers] + parent_image_ids = [mdata.v1_metadata.parent for mdata in layers + if mdata.v1_metadata.parent] + all_image_ids = list(set(docker_image_ids + parent_image_ids)) + + images_query = model.image.lookup_repository_images(repo, all_image_ids) + images_map = {image.docker_image_id: image for image in images_query} + + # Lookup the storages associated with each blob in the manifest. + checksums = [str(mdata.digest) for mdata in manifest.layers] + storage_query = model.storage.lookup_repo_storages_by_content_checksum(repo, checksums) + storage_map = {storage.content_checksum: storage for storage in storage_query} + + # Synthesize the V1 metadata for each layer. + manifest_digest = manifest.digest + tag_name = manifest.tag + + for mdata in layers: + digest_str = str(mdata.digest) + v1_mdata = mdata.v1_metadata + + # If there is already a V1 image for this layer, nothing more to do. + if v1_mdata.docker_id in images_map: + continue + + # Lookup the parent image for the layer, if any. + parent_image = None + if v1_mdata.parent is not None: + parent_image = images_map.get(v1_mdata.parent) + if parent_image is None: + msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent) + raise ManifestInvalid(detail={'message': msg}) + + # Synthesize and store the v1 metadata in the db. + blob_storage = storage_map.get(digest_str) + if blob_storage is None: + raise BlobUnknown(detail={'digest': digest_str}) + + image = model.image.synthesize_v1_image(repo, blob_storage, v1_mdata.docker_id, + v1_mdata.created, v1_mdata.comment, v1_mdata.command, + mdata.v1_metadata_str, parent_image) + + images_map[v1_mdata.docker_id] = image + + if not layers: + # The manifest doesn't actually reference any layers! + raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'}) + + # Store the manifest pointing to the tag. + leaf_layer = layers[-1] + model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer.v1_metadata.docker_id, + manifest_digest, request.data) + + # Spawn the repo_push event. + event_data = { + 'updated_tags': [tag_name], + } + + track_and_log('push_repo', repo) + spawn_notification(repo, 'repo_push', event_data) response = make_response('OK', 202) response.headers['Docker-Content-Digest'] = manifest_digest - response.headers['Location'] = 'https://fun.com' + response.headers['Location'] = url_for('v2.fetch_manifest_by_digest', namespace=namespace, + repo_name=repo_name, manifest_ref=manifest_digest) return response -# @v2_bp.route('///manifests/', -# methods=['PUT']) -# @process_jwt_auth -# @require_repo_write -# @anon_protect -# def write_manifest(namespace, repo_name, tag_digest): -# logger.debug('Writing tag manifest with name: %s', tag_digest) +@v2_bp.route(MANIFEST_DIGEST_ROUTE, methods=['DELETE']) +@process_jwt_auth +@require_repo_write +@anon_protect +def delete_manifest_by_digest(namespace, repo_name, manifest_ref): + """ Delete the manifest specified by the digest. Note: there is no equivalent + method for deleting by tag name because it is forbidden by the spec. + """ + try: + manifest = model.tag.load_manifest_by_digest(namespace, repo_name, manifest_ref) + except model.InvalidManifestException: + # Without a tag name to reference, we can't make an attempt to generate the manifest + raise ManifestUnknown() -# manifest_path = digest_tools.content_path(tag_digest) -# storage.stream_write('local_us', manifest_path, get_input_stream(request)) + manifest.delete_instance() -# return make_response('Manifest {0}'.format(tag_digest)) + return make_response('', 202) + + +def _generate_and_store_manifest(namespace, repo_name, tag_name): + # First look up the tag object and its ancestors + image = model.tag.get_tag_image(namespace, repo_name, tag_name) + parents = model.image.get_parent_images(namespace, repo_name, image) + + # Create and populate the manifest builder + builder = SignedManifestBuilder(namespace, repo_name, tag_name) + + # Add the leaf layer + builder.add_layer(image.storage.content_checksum, image.v1_json_metadata) + + for parent in parents: + builder.add_layer(parent.storage.content_checksum, parent.v1_json_metadata) + + # Sign the manifest with our signing key. + manifest = builder.build(docker_v2_signing_key) + manifest_row = model.tag.associate_generated_tag_manifest(namespace, repo_name, tag_name, + manifest.digest, manifest.bytes) + + return manifest_row diff --git a/endpoints/v2/tag.py b/endpoints/v2/tag.py new file mode 100644 index 000000000..16ab87431 --- /dev/null +++ b/endpoints/v2/tag.py @@ -0,0 +1,32 @@ +from flask import jsonify, url_for + +from endpoints.v2 import v2_bp, require_repo_read +from endpoints.v2.errors import NameUnknown +from endpoints.v2.v2util import add_pagination +from auth.jwt_auth import process_jwt_auth +from endpoints.decorators import anon_protect +from data import model + +@v2_bp.route('///tags/list', methods=['GET']) +@process_jwt_auth +@require_repo_read +@anon_protect +def list_all_tags(namespace, repo_name): + repository = model.repository.get_repository(namespace, repo_name) + if repository is None: + raise NameUnknown() + + query = model.tag.list_repository_tags(namespace, repo_name) + + url = url_for('v2.list_all_tags', namespace=namespace, repo_name=repo_name) + link, query = add_pagination(query, url) + + response = jsonify({ + 'name': '{0}/{1}'.format(namespace, repo_name), + 'tags': [tag.name for tag in query], + }) + + if link is not None: + response.headers['Link'] = link + + return response diff --git a/endpoints/v2/v2auth.py b/endpoints/v2/v2auth.py index 7c05e10a0..4e48b4416 100644 --- a/endpoints/v2/v2auth.py +++ b/endpoints/v2/v2auth.py @@ -1,6 +1,3 @@ -# XXX This code is not yet ready to be run in production, and should remain disabled until such -# XXX time as this notice is removed. - import logging import re import time @@ -17,15 +14,17 @@ from auth.permissions import (ModifyRepositoryPermission, ReadRepositoryPermissi CreateRepositoryPermission) from endpoints.v2 import v2_bp from util.cache import no_cache -from util.names import parse_namespace_repository - +from util.names import parse_namespace_repository, REPOSITORY_NAME_REGEX +from endpoints.decorators import anon_protect logger = logging.getLogger(__name__) +TOKEN_VALIDITY_LIFETIME_S = 60 * 60 # 1 hour SCOPE_REGEX = re.compile( - r'repository:([\.a-zA-Z0-9_\-]+/[\.a-zA-Z0-9_\-]+):(((push|pull|\*),)*(push|pull|\*))' + r'^repository:([\.a-zA-Z0-9_\-]+/[\.a-zA-Z0-9_\-]+):(((push|pull|\*),)*(push|pull|\*))$' ) +ANONYMOUS_SUB = '(anonymous)' @lru_cache(maxsize=1) @@ -43,6 +42,7 @@ def load_private_key(private_key_file_path): @v2_bp.route('/auth') @process_auth @no_cache +@anon_protect def generate_registry_jwt(): """ This endpoint will generate a JWT conforming to the Docker registry v2 auth spec: https://docs.docker.com/registry/spec/auth/token/ @@ -54,13 +54,11 @@ def generate_registry_jwt(): logger.debug('Scope request: %s', scope_param) user = get_authenticated_user() - access = [] if scope_param is not None: match = SCOPE_REGEX.match(scope_param) - if match is None or match.end() != len(scope_param): + if match is None: logger.debug('Match: %s', match) - logger.debug('End: %s', match.end()) logger.debug('len: %s', len(scope_param)) logger.warning('Unable to decode repository and actions: %s', scope_param) abort(400) @@ -71,21 +69,29 @@ def generate_registry_jwt(): actions = match.group(2).split(',') namespace, reponame = parse_namespace_repository(namespace_and_repo) + + # Ensure that we are never creating an invalid repository. + if not REPOSITORY_NAME_REGEX.match(reponame): + abort(400) + if 'pull' in actions and 'push' in actions: + if user is None: + abort(401) + repo = model.repository.get_repository(namespace, reponame) if repo: - if not ModifyRepositoryPermission(namespace, reponame): + if not ModifyRepositoryPermission(namespace, reponame).can(): abort(403) else: - if not CreateRepositoryPermission(namespace): + if not CreateRepositoryPermission(namespace).can(): abort(403) logger.debug('Creating repository: %s/%s', namespace, reponame) model.repository.create_repository(namespace, reponame, user) elif 'pull' in actions: - if not ReadRepositoryPermission(namespace, reponame): + if (not ReadRepositoryPermission(namespace, reponame).can() and + not model.repository.repository_is_public(namespace, reponame)): abort(403) - access.append({ 'type': 'repository', 'name': namespace_and_repo, @@ -93,11 +99,12 @@ def generate_registry_jwt(): }) token_data = { - 'iss': 'token-issuer', + 'iss': app.config['JWT_AUTH_TOKEN_ISSUER'], 'aud': audience_param, 'nbf': int(time.time()), - 'exp': int(time.time() + 60), - 'sub': user.username, + 'iat': int(time.time()), + 'exp': int(time.time() + TOKEN_VALIDITY_LIFETIME_S), + 'sub': user.username if user else ANONYMOUS_SUB, 'access': access, } diff --git a/endpoints/v2/v2util.py b/endpoints/v2/v2util.py new file mode 100644 index 000000000..78ffdd756 --- /dev/null +++ b/endpoints/v2/v2util.py @@ -0,0 +1,19 @@ +from flask import request +from app import get_app_url + +_MAX_RESULTS_PER_PAGE = 100 + +def add_pagination(query, url): + """ Adds optional pagination to the given query by looking for the Docker V2 pagination request + args. """ + limit = request.args.get('n', None) + page = request.args.get('page', 1) + + if limit is None: + return None, query + + limit = max(limit, _MAX_RESULTS_PER_PAGE) + url = get_app_url() + url + query = query.paginate(page, limit) + link = url + '?n=%s&last=%s; rel="next"' % (limit, page + 1) + return link, query diff --git a/endpoints/verbs.py b/endpoints/verbs.py index 4365d95a7..6db93a8b2 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -29,11 +29,7 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag # the database. with database.UseThenDisconnect(app.config): image_list = list(model.image.get_parent_images(namespace, repository, repo_image)) - image_list.append(repo_image) - - # Note: The image list ordering must be from top-level image, downward, so we reverse the order - # here. - image_list.reverse() + image_list.insert(0, repo_image) def get_next_image(): for current_image in image_list: @@ -41,7 +37,7 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag def get_next_layer(): for current_image_entry in image_list: - current_image_path = store.image_layer_path(current_image_entry.storage.uuid) + current_image_path = model.storage.get_layer_path(current_image_entry.storage) current_image_stream = store.stream_read_file(current_image_entry.storage.locations, current_image_path) @@ -51,7 +47,7 @@ def _open_stream(formatter, namespace, repository, tag, synthetic_image_id, imag yield current_image_stream stream = formatter.build_stream(namespace, repository, tag, synthetic_image_id, image_json, - get_next_image, get_next_layer) + get_next_image, get_next_layer, get_image_json) return stream.read @@ -90,10 +86,14 @@ def _write_synthetic_image_to_storage(verb, linked_storage_uuid, linked_location queue_file.add_exception_handler(handle_exception) - image_path = store.image_layer_path(linked_storage_uuid) + print "Starting write of sythn image" + + image_path = store.v1_image_layer_path(linked_storage_uuid) store.stream_write(linked_locations, image_path, queue_file) queue_file.close() + print "Done writing synth image" + if not queue_file.raised_exception: # Setup the database (since this is a new process) and then disconnect immediately # once the operation completes. @@ -170,9 +170,9 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= derived = model.storage.find_or_create_derived_storage(repo_image.storage, verb, store.preferred_locations[0]) - if not derived.uploading: + if not derived.uploading and False: logger.debug('Derived %s image %s exists in storage', verb, derived.uuid) - derived_layer_path = store.image_layer_path(derived.uuid) + derived_layer_path = model.storage.get_layer_path(derived) download_url = store.get_direct_download_url(derived.locations, derived_layer_path) if download_url: logger.debug('Redirecting to download URL for derived %s image %s', verb, derived.uuid) @@ -184,8 +184,8 @@ def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker= logger.debug('Sending cached derived %s image %s', verb, derived.uuid) return send_file(store.stream_read_file(derived.locations, derived_layer_path)) - # Load the ancestry for the image. - uuid = repo_image.storage.uuid + # Load the full image list for the image. + full_image_list = model.image.get_image_layers(repo_image) logger.debug('Building and returning derived %s image %s', verb, derived.uuid) diff --git a/formats/aci.py b/formats/aci.py index 718c35445..11a7a06ef 100644 --- a/formats/aci.py +++ b/formats/aci.py @@ -10,7 +10,7 @@ class ACIImage(TarImageFormatter): """ def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator): + layer_json, get_image_iterator, get_layer_iterator, get_image_json): # ACI Format (.tar): # manifest - The JSON manifest # rootfs - The root file system diff --git a/formats/squashed.py b/formats/squashed.py index b26a069fd..c04b4aa7e 100644 --- a/formats/squashed.py +++ b/formats/squashed.py @@ -19,8 +19,13 @@ class SquashedDockerImage(TarImageFormatter): command. """ + # TODO(jschorr): Change this multiplier to reflect the 99%-tile of the actual difference between + # the uncompressed image size and the Size reported by Docker. + SIZE_MULTIPLIER = 2 + def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator): + layer_json, get_image_iterator, get_layer_iterator, get_image_json): + # Docker import V1 Format (.tar): # repositories - JSON file containing a repo -> tag -> image map # {image ID folder}: @@ -52,7 +57,14 @@ class SquashedDockerImage(TarImageFormatter): # Yield the merged layer data's header. estimated_file_size = 0 for image in get_image_iterator(): - estimated_file_size += image.storage.uncompressed_size + # In V1 we have the actual uncompressed size, which is needed for back compat with + # older versions of Docker. + # In V2, we use the size given in the image JSON. + if image.storage.uncompressed_size: + estimated_file_size += image.storage.uncompressed_size + else: + image_json = get_image_json(image) + estimated_file_size += image_json.get('Size', 0) * SquashedDockerImage.SIZE_MULTIPLIER yield self.tar_file_header(synthetic_image_id + '/layer.tar', estimated_file_size) @@ -65,7 +77,8 @@ class SquashedDockerImage(TarImageFormatter): # If the yielded size is more than the estimated size (which is unlikely but possible), then # raise an exception since the tar header will be wrong. if yielded_size > estimated_file_size: - raise FileEstimationException() + message = "Expected %s bytes, found %s bytes" % (estimated_file_size, yielded_size) + raise FileEstimationException(message) # If the yielded size is less than the estimated size (which is likely), fill the rest with # zeros. diff --git a/formats/tarimageformatter.py b/formats/tarimageformatter.py index e88275b03..001c83831 100644 --- a/formats/tarimageformatter.py +++ b/formats/tarimageformatter.py @@ -5,16 +5,17 @@ class TarImageFormatter(object): """ Base class for classes which produce a TAR containing image and layer data. """ def build_stream(self, namespace, repository, tag, synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator): + get_image_iterator, get_layer_iterator, get_image_json): """ Builds and streams a synthetic .tar.gz that represents the formatted TAR created by this class's implementation. """ return GzipWrap(self.stream_generator(namespace, repository, tag, synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator)) + get_image_iterator, get_layer_iterator, + get_image_json)) def stream_generator(self, namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator): + layer_json, get_image_iterator, get_layer_iterator, get_image_json): raise NotImplementedError def tar_file(self, name, contents): @@ -45,4 +46,4 @@ class TarImageFormatter(object): info.type = tarfile.DIRTYPE # allow the directory to be readable by non-root users info.mode = 0755 - return info.tobuf() \ No newline at end of file + return info.tobuf() diff --git a/initdb.py b/initdb.py index ce51c1749..f75420911 100644 --- a/initdb.py +++ b/initdb.py @@ -4,6 +4,7 @@ import hashlib import random import calendar import os +import argparse from datetime import datetime, timedelta from peewee import (SqliteDatabase, create_model_tables, drop_model_tables, savepoint_sqlite, @@ -20,6 +21,7 @@ from data.database import (db, all_models, Role, TeamRole, Visibility, LoginServ from data import model from app import app, storage as store from storage.basestorage import StoragePaths +from endpoints.v2.manifest import _generate_and_store_manifest from workers import repositoryactioncounter @@ -88,8 +90,7 @@ def __create_subtree(repo, structure, creator_username, parent, tag_map): # Write some data for the storage. if os.environ.get('WRITE_STORAGE_FILES'): storage_paths = StoragePaths() - paths = [storage_paths.image_ancestry_path, - storage_paths.image_layer_path] + paths = [storage_paths.v1_image_layer_path] for path_builder in paths: path = path_builder(new_image.storage.uuid) @@ -111,7 +112,7 @@ def __create_subtree(repo, structure, creator_username, parent, tag_map): new_image = model.image.set_image_metadata(docker_image_id, repo.namespace_user.username, repo.name, str(creation_time), 'no comment', command, - v1_metadata, parent) + json.dumps(v1_metadata), parent) compressed_size = random.randrange(1, 1024 * 1024 * 1024) model.image.set_image_size(docker_image_id, repo.namespace_user.username, repo.name, @@ -134,6 +135,7 @@ def __create_subtree(repo, structure, creator_username, parent, tag_map): new_tag = model.tag.create_or_update_tag(repo.namespace_user.username, repo.name, tag_name, new_image.docker_image_id) + _generate_and_store_manifest(repo.namespace_user.username, repo.name, tag_name) tag_map[tag_name] = new_tag for tag_name in last_node_tags: @@ -349,7 +351,7 @@ def wipe_database(): drop_model_tables(all_models, fail_silently=True) -def populate_database(): +def populate_database(minimal=False): logger.debug('Populating the DB with test data.') new_user_1 = model.user.create_user('devtable', 'password', 'jschorr@devtable.com') @@ -357,6 +359,10 @@ def populate_database(): new_user_1.stripe_id = TEST_STRIPE_ID new_user_1.save() + if minimal: + logger.debug('Skipping most db population because user requested mininal db') + return + disabled_user = model.user.create_user('disabled', 'password', 'jschorr+disabled@devtable.com') disabled_user.verified = True disabled_user.enabled = False @@ -405,7 +411,8 @@ def populate_database(): 'to_date': formatdate(calendar.timegm(to_date.utctimetuple())), 'reason': 'database migration' } - model.notification.create_notification('maintenance', new_user_1, metadata=notification_metadata) + model.notification.create_notification('maintenance', new_user_1, + metadata=notification_metadata) __generate_repository(new_user_4, 'randomrepo', 'Random repo repository.', False, @@ -643,7 +650,12 @@ def populate_database(): while repositoryactioncounter.count_repository_actions(): pass + if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Initialize the test database.') + parser.add_argument('--simple', action='store_true') + args = parser.parse_args() + log_level = getattr(logging, app.config['LOGGING_LEVEL']) logging.basicConfig(level=log_level) @@ -652,5 +664,4 @@ if __name__ == '__main__': initialize_database() - if app.config.get('POPULATE_DB_TEST_DATA', False): - populate_database() + populate_database(args.simple) diff --git a/registry.py b/registry.py index 492fb633f..df868242c 100644 --- a/registry.py +++ b/registry.py @@ -8,10 +8,10 @@ from app import app as application import endpoints.decorated from endpoints.v1 import v1_bp -# from endpoints.v2 import v2_bp +from endpoints.v2 import v2_bp if os.environ.get('DEBUGLOG') == 'true': logging.config.fileConfig('conf/logging_debug.conf', disable_existing_loggers=False) application.register_blueprint(v1_bp, url_prefix='/v1') -# application.register_blueprint(v2_bp, url_prefix='/v2') +application.register_blueprint(v2_bp, url_prefix='/v2') diff --git a/requirements-nover.txt b/requirements-nover.txt index e55df79f7..f20af7f67 100644 --- a/requirements-nover.txt +++ b/requirements-nover.txt @@ -53,7 +53,7 @@ python-keystoneclient Flask-Testing pyjwt toposort -rfc3987 pyjwkest +rfc3987 jsonpath-rw bintrees \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1fe1e4248..82b47ec1d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ blinker==1.3 boto==2.38.0 cachetools==1.0.3 cffi==1.1.2 -cryptography==0.9.2 +cryptography==1.0.2 debtcollector==0.5.0 enum34==1.0.4 Flask==0.10.1 @@ -59,8 +59,8 @@ pyasn1==0.1.8 pycparser==2.14 pycrypto==2.6.1 pygpgme==0.3 +pyjwkest==1.0.3 PyJWT==1.4.0 -pyjwkest==1.0.1 PyMySQL==0.6.6 pyOpenSSL==0.15.1 PyPDF2==1.24 @@ -97,9 +97,10 @@ git+https://github.com/DevTable/aniso8601-fake.git git+https://github.com/DevTable/anunidecode.git git+https://github.com/DevTable/pygithub.git git+https://github.com/DevTable/container-cloud-config.git +git+https://github.com/coreos/mockldap.git git+https://github.com/coreos/py-bitbucket.git git+https://github.com/coreos/pyapi-gitlab.git@timeout -git+https://github.com/coreos/mockldap.git +git+https://github.com/coreos/resumablehashlib.git git+https://github.com/coreos/resumablehashlib.git git+https://github.com/DevTable/python-etcd.git@sslfix git+https://github.com/NateFerrero/oauth2lib.git diff --git a/storage/basestorage.py b/storage/basestorage.py index 0b822b054..f8ff49919 100644 --- a/storage/basestorage.py +++ b/storage/basestorage.py @@ -1,5 +1,7 @@ import tempfile +from digest.digest_tools import content_path +from util.registry.filelike import READ_UNTIL_END class StoragePaths(object): shared_images = 'sharedimages' @@ -16,39 +18,29 @@ class StoragePaths(object): return tmpf, fn - def image_path(self, storage_uuid): + def _image_path(self, storage_uuid): return '{0}/{1}/'.format(self.shared_images, storage_uuid) - def image_layer_path(self, storage_uuid): - base_path = self.image_path(storage_uuid) + def v1_image_layer_path(self, storage_uuid): + base_path = self._image_path(storage_uuid) return '{0}layer'.format(base_path) - def image_ancestry_path(self, storage_uuid): - base_path = self.image_path(storage_uuid) - return '{0}ancestry'.format(base_path) + def blob_path(self, digest_str): + return content_path(digest_str) def image_file_trie_path(self, storage_uuid): - base_path = self.image_path(storage_uuid) + base_path = self._image_path(storage_uuid) return '{0}files.trie'.format(base_path) def image_file_diffs_path(self, storage_uuid): - base_path = self.image_path(storage_uuid) + base_path = self._image_path(storage_uuid) return '{0}diffs.json'.format(base_path) class BaseStorage(StoragePaths): - """Storage is organized as follow: - $ROOT/images//json - $ROOT/images//layer - $ROOT/repositories/// - """ - - # Useful if we want to change those locations later without rewriting - # the code which uses Storage - repositories = 'repositories' - images = 'images' - # Set the IO buffer to 64kB - buffer_size = 64 * 1024 + def __init__(self): + # Set the IO buffer to 64kB + self.buffer_size = 64 * 1024 def setup(self): """ Called to perform any storage system setup. """ @@ -95,27 +87,57 @@ class BaseStorage(StoragePaths): def get_checksum(self, path): raise NotImplementedError + def stream_write_to_fp(self, in_fp, out_fp, num_bytes=READ_UNTIL_END): + """ Copy the specified number of bytes from the input file stream to the output stream. If + num_bytes < 0 copy until the stream ends. + """ + bytes_copied = 0 + while bytes_copied < num_bytes or num_bytes == READ_UNTIL_END: + size_to_read = min(num_bytes - bytes_copied, self.buffer_size) + if size_to_read < 0: + size_to_read = self.buffer_size + + try: + buf = in_fp.read(size_to_read) + if not buf: + break + out_fp.write(buf) + bytes_copied += len(buf) + except IOError: + break + + return bytes_copied + def copy_to(self, destination, path): raise NotImplementedError -class DigestInvalidException(RuntimeError): +class InvalidChunkException(RuntimeError): pass class BaseStorageV2(BaseStorage): def initiate_chunked_upload(self): - """ Start a new chunked upload, and return a handle with which the upload can be referenced. + """ Start a new chunked upload, returning the uuid and any associated storage metadata """ raise NotImplementedError - def stream_upload_chunk(self, uuid, offset, length, in_fp): + def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata): """ Upload the specified amount of data from the given file pointer to the chunked destination - specified, starting at the given offset. Returns the number of bytes written. + specified, starting at the given offset. Returns the number of bytes uploaded, and a new + version of the storage_metadata. Raises InvalidChunkException if the offset or length can + not be accepted. Pass length as -1 to upload as much data from the in_fp as possible. """ raise NotImplementedError - def complete_chunked_upload(self, uuid, final_path, digest_to_verify): + def complete_chunked_upload(self, uuid, final_path, storage_metadata): """ Complete the chunked upload and store the final results in the path indicated. + Returns nothing. + """ + raise NotImplementedError + + def cancel_chunked_upload(self, uuid, storage_metadata): + """ Cancel the chunked upload and clean up any outstanding partially uploaded data. + Returns nothing. """ raise NotImplementedError diff --git a/storage/cloud.py b/storage/cloud.py index cc2750dae..c1de05ad8 100644 --- a/storage/cloud.py +++ b/storage/cloud.py @@ -1,21 +1,30 @@ import cStringIO as StringIO import os import logging +import copy import boto.s3.connection +import boto.s3.multipart import boto.gs.connection import boto.s3.key import boto.gs.key from io import BufferedIOBase +from uuid import uuid4 +from collections import namedtuple +from util.registry import filelike +from storage.basestorage import BaseStorageV2, InvalidChunkException import app -from storage.basestorage import BaseStorage logger = logging.getLogger(__name__) +_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length']) +_CHUNKS_KEY = 'chunks' + + class StreamReadKeyAsFile(BufferedIOBase): def __init__(self, key): self._key = key @@ -38,9 +47,13 @@ class StreamReadKeyAsFile(BufferedIOBase): self._key.close(fast=True) -class _CloudStorage(BaseStorage): +class _CloudStorage(BaseStorageV2): def __init__(self, connection_class, key_class, connect_kwargs, upload_params, storage_path, access_key, secret_key, bucket_name): + super(_CloudStorage, self).__init__() + + self.automatic_chunk_size = 5 * 1024 * 1024 + self._initialized = False self._bucket_name = bucket_name self._access_key = access_key @@ -136,12 +149,9 @@ class _CloudStorage(BaseStorage): raise IOError('No such key: \'{0}\''.format(path)) return StreamReadKeyAsFile(key) - def stream_write(self, path, fp, content_type=None, content_encoding=None): + def __initiate_multipart_upload(self, path, content_type, content_encoding): # Minimum size of upload part size on S3 is 5MB self._initialize_cloud_conn() - buffer_size = 5 * 1024 * 1024 - if self.buffer_size > buffer_size: - buffer_size = self.buffer_size path = self._init_path(path) metadata = {} @@ -151,26 +161,49 @@ class _CloudStorage(BaseStorage): if content_encoding is not None: metadata['Content-Encoding'] = content_encoding - mp = self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata, - **self._upload_params) app.metric_queue.put('MultipartUploadStart', 1) + return self._cloud_bucket.initiate_multipart_upload(path, metadata=metadata, + **self._upload_params) + + def stream_write(self, path, fp, content_type=None, content_encoding=None): + return self._stream_write_internal(path, fp, content_type, content_encoding) + + def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None, + cancel_on_error=True, size=filelike.READ_UNTIL_END): + mp = self.__initiate_multipart_upload(path, content_type, content_encoding) + + # We are going to reuse this but be VERY careful to only read the number of bytes written to it + buf = StringIO.StringIO() + num_part = 1 - while True: + total_bytes_written = 0 + while size == filelike.READ_UNTIL_END or total_bytes_written < size: + bytes_to_copy = self.automatic_chunk_size + if size != filelike.READ_UNTIL_END: + # We never want to ask for more bytes than our caller has indicated to copy + bytes_to_copy = min(bytes_to_copy, size - total_bytes_written) + + buf.seek(0) try: - buf = fp.read(buffer_size) - if not buf: + # Stage the bytes into the buffer for use with the multipart upload file API + bytes_staged = self.stream_write_to_fp(fp, buf, bytes_to_copy) + if bytes_staged == 0: break - io = StringIO.StringIO(buf) - mp.upload_part_from_file(io, num_part) + + buf.seek(0) + mp.upload_part_from_file(buf, num_part, size=bytes_staged) + total_bytes_written += bytes_staged num_part += 1 - io.close() except IOError: app.metric_queue.put('MultipartUploadFailure', 1) - mp.cancel_upload() - raise + if cancel_on_error: + mp.cancel_upload() + return 0 - app.metric_queue.put('MultipartUploadSuccess', 1) - mp.complete_upload() + if total_bytes_written > 0: + app.metric_queue.put('MultipartUploadSuccess', 1) + mp.complete_upload() + return total_bytes_written def list_directory(self, path=None): self._initialize_cloud_conn() @@ -248,6 +281,119 @@ class _CloudStorage(BaseStorage): with self.stream_read_file(path) as fp: destination.stream_write(path, fp) + def _rel_upload_path(self, uuid): + return 'uploads/{0}'.format(uuid) + + def initiate_chunked_upload(self): + self._initialize_cloud_conn() + random_uuid = str(uuid4()) + + metadata = { + _CHUNKS_KEY: [], + } + + return random_uuid, metadata + + def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata): + self._initialize_cloud_conn() + + # We are going to upload each chunk to a separate key + chunk_path = self._rel_upload_path(str(uuid4())) + bytes_written = self._stream_write_internal(chunk_path, in_fp, cancel_on_error=False, + size=length) + + new_metadata = copy.deepcopy(storage_metadata) + + # We are only going to track keys to which data was confirmed written + if bytes_written > 0: + new_metadata[_CHUNKS_KEY].append(_PartUploadMetadata(chunk_path, offset, bytes_written)) + + return bytes_written, new_metadata + + def _chunk_generator(self, chunk_list): + for chunk in chunk_list: + yield filelike.StreamSlice(self.stream_read_file(chunk.path), 0, chunk.length) + + @staticmethod + def _chunk_list_from_metadata(storage_metadata): + return [_PartUploadMetadata(*chunk_args) for chunk_args in storage_metadata[_CHUNKS_KEY]] + + def _client_side_chunk_join(self, final_path, chunk_list): + # If there's only one chunk, just "move" (copy and delete) the key and call it a day. + if len(chunk_list) == 1: + chunk_path = chunk_list[0].path + # Let the copy raise an exception if it fails. + self._cloud_bucket.copy_key(final_path, self._bucket_name, chunk_path) + + # Attempt to clean up the old chunk. + try: + self._cloud_bucket.delete_key(chunk_path) + except IOError: + # We failed to delete a chunk. This sucks, but we shouldn't fail the push. + msg = 'Failed to clean up chunk %s for move of %s' + logger.exception(msg, chunk_path, final_path) + else: + # Concatenate and write all the chunks as one key. + concatenated = filelike.FilelikeStreamConcat(self._chunk_generator(chunk_list)) + self.stream_write(final_path, concatenated) + + # Attempt to clean up all the chunks. + for chunk in chunk_list: + try: + self._cloud_bucket.delete_key(chunk.path) + except IOError: + # We failed to delete a chunk. This sucks, but we shouldn't fail the push. + msg = 'Failed to clean up chunk %s for reupload of %s' + logger.exception(msg, chunk.path, final_path) + + def complete_chunked_upload(self, uuid, final_path, storage_metadata): + self._initialize_cloud_conn() + + # Here is where things get interesting: we are going to try to assemble this server side + # In order to be a candidate all parts (after offsets have been computed) must be at least 5MB + server_side_assembly = True + chunk_list = self._chunk_list_from_metadata(storage_metadata) + for chunk_offset, chunk in enumerate(chunk_list): + # If the chunk is both too small, and not the last chunk, we rule out server side assembly + if chunk.length < self.automatic_chunk_size and (chunk_offset + 1) < len(chunk_list): + server_side_assembly = False + break + + if server_side_assembly: + logger.debug('Performing server side assembly of multi-part upload for: %s', final_path) + try: + # Awesome, we can do this completely server side, now we have to start a new multipart + # upload and use copy_part_from_key to set all of the chunks. + mpu = self.__initiate_multipart_upload(final_path, content_type=None, content_encoding=None) + + for chunk_offset, chunk in enumerate(chunk_list): + abs_chunk_path = self._init_path(chunk.path) + part_num = chunk_offset + 1 + chunk_end_offset_inclusive = chunk.length - 1 + mpu.copy_part_from_key(self.get_cloud_bucket().name, abs_chunk_path, part_num, + start=0, end=chunk_end_offset_inclusive) + mpu.complete_upload() + + except IOError as ioe: + # Something bad happened, log it and then give up + msg = 'Exception when attempting server-side assembly for: %s' + logger.exception(msg, final_path) + mpu.cancel_upload() + raise ioe + + else: + # We are going to turn all of the server side objects into a single file-like stream, and + # pass that to stream_write to chunk and upload the final object. + self._client_side_chunk_join(final_path, chunk_list) + + + def cancel_chunked_upload(self, uuid, storage_metadata): + self._initialize_cloud_conn() + + # We have to go through and delete all of the uploaded chunks + for chunk in self._chunk_list_from_metadata(storage_metadata): + self.remove(chunk.path) + class S3Storage(_CloudStorage): def __init__(self, storage_path, s3_access_key, s3_secret_key, s3_bucket): @@ -304,7 +450,8 @@ class GoogleCloudStorage(_CloudStorage): """) - def stream_write(self, path, fp, content_type=None, content_encoding=None): + def _stream_write_internal(self, path, fp, content_type=None, content_encoding=None, + cancel_on_error=True, size=filelike.READ_UNTIL_END): # Minimum size of upload part size on S3 is 5MB self._initialize_cloud_conn() path = self._init_path(path) @@ -316,8 +463,23 @@ class GoogleCloudStorage(_CloudStorage): if content_encoding is not None: key.set_metadata('Content-Encoding', content_encoding) + if size != filelike.READ_UNTIL_END: + fp = filelike.StreamSlice(fp, 0, size) + + # TODO figure out how to handle cancel_on_error=False key.set_contents_from_stream(fp) + return key.size + + def complete_chunked_upload(self, uuid, final_path, storage_metadata): + self._initialize_cloud_conn() + + # Boto does not support GCS's multipart upload API because it differs from S3, so + # we are forced to join it all locally and then reupload. + # See https://github.com/boto/boto/issues/3355 + chunk_list = self._chunk_list_from_metadata(storage_metadata) + self._client_side_chunk_join(final_path, chunk_list) + class RadosGWStorage(_CloudStorage): def __init__(self, hostname, is_secure, storage_path, access_key, secret_key, bucket_name): @@ -344,3 +506,12 @@ class RadosGWStorage(_CloudStorage): return None return super(RadosGWStorage, self).get_direct_upload_url(path, mime_type, requires_cors) + + def complete_chunked_upload(self, uuid, final_path, storage_metadata): + self._initialize_cloud_conn() + + # RadosGW does not support multipart copying from keys, so we are forced to join + # it all locally and then reupload. + # See https://github.com/ceph/ceph/pull/5139 + chunk_list = self._chunk_list_from_metadata(storage_metadata) + self._client_side_chunk_join(final_path, chunk_list) diff --git a/storage/distributedstorage.py b/storage/distributedstorage.py index 55afec293..3e94a3dd4 100644 --- a/storage/distributedstorage.py +++ b/storage/distributedstorage.py @@ -53,10 +53,10 @@ class DistributedStorage(StoragePaths): initiate_chunked_upload = _location_aware(BaseStorageV2.initiate_chunked_upload) stream_upload_chunk = _location_aware(BaseStorageV2.stream_upload_chunk) complete_chunked_upload = _location_aware(BaseStorageV2.complete_chunked_upload) + cancel_chunked_upload = _location_aware(BaseStorageV2.cancel_chunked_upload) def copy_between(self, path, source_location, destination_location): """ Copies a file between the source location and the destination location. """ source_storage = self._storages[source_location] destination_storage = self._storages[destination_location] source_storage.copy_to(destination_storage, path) - diff --git a/storage/fakestorage.py b/storage/fakestorage.py index b4f27be32..658a88a86 100644 --- a/storage/fakestorage.py +++ b/storage/fakestorage.py @@ -1,9 +1,14 @@ -from storage.basestorage import BaseStorage -from cStringIO import StringIO +import cStringIO as StringIO +import hashlib -_FAKE_STORAGE_MAP = {} +from collections import defaultdict +from uuid import uuid4 -class FakeStorage(BaseStorage): +from storage.basestorage import BaseStorageV2 + +_FAKE_STORAGE_MAP = defaultdict(StringIO.StringIO) + +class FakeStorage(BaseStorageV2): def _init_path(self, path=None, create=False): return path @@ -11,19 +16,29 @@ class FakeStorage(BaseStorage): if not path in _FAKE_STORAGE_MAP: raise IOError('Fake file %s not found' % path) - return _FAKE_STORAGE_MAP.get(path) + _FAKE_STORAGE_MAP.get(path).seek(0) + return _FAKE_STORAGE_MAP.get(path).read() def put_content(self, path, content): - _FAKE_STORAGE_MAP[path] = content + _FAKE_STORAGE_MAP.pop(path, None) + _FAKE_STORAGE_MAP[path].write(content) def stream_read(self, path): - yield _FAKE_STORAGE_MAP[path] + io_obj = _FAKE_STORAGE_MAP[path] + io_obj.seek(0) + while True: + buf = io_obj.read(self.buffer_size) + if not buf: + break + yield buf def stream_read_file(self, path): return StringIO(_FAKE_STORAGE_MAP[path]) def stream_write(self, path, fp, content_type=None, content_encoding=None): - _FAKE_STORAGE_MAP[path] = fp.read() + out_fp = _FAKE_STORAGE_MAP[path] + out_fp.seek(0) + self.stream_write_to_fp(fp, out_fp) def remove(self, path): _FAKE_STORAGE_MAP.pop(path, None) @@ -32,4 +47,21 @@ class FakeStorage(BaseStorage): return path in _FAKE_STORAGE_MAP def get_checksum(self, path): - return path + return hashlib.sha256(_FAKE_STORAGE_MAP[path].read()).hexdigest()[:7] + + def initiate_chunked_upload(self): + new_uuid = str(uuid4()) + _FAKE_STORAGE_MAP[new_uuid].seek(0) + return new_uuid, {} + + def stream_upload_chunk(self, uuid, offset, length, in_fp, _): + upload_storage = _FAKE_STORAGE_MAP[uuid] + upload_storage.seek(offset) + return self.stream_write_to_fp(in_fp, upload_storage, length), {} + + def complete_chunked_upload(self, uuid, final_path, _): + _FAKE_STORAGE_MAP[final_path] = _FAKE_STORAGE_MAP[uuid] + _FAKE_STORAGE_MAP.pop(uuid, None) + + def cancel_chunked_upload(self, uuid, _): + _FAKE_STORAGE_MAP.pop(uuid, None) diff --git a/storage/local.py b/storage/local.py index 333c9724d..4c5873c92 100644 --- a/storage/local.py +++ b/storage/local.py @@ -8,15 +8,14 @@ import psutil from uuid import uuid4 from storage.basestorage import BaseStorageV2 -from digest import digest_tools logger = logging.getLogger(__name__) class LocalStorage(BaseStorageV2): - def __init__(self, storage_path): + super(LocalStorage, self).__init__() self._root_path = storage_path def _init_path(self, path=None, create=False): @@ -55,25 +54,7 @@ class LocalStorage(BaseStorageV2): # Size is mandatory path = self._init_path(path, create=True) with open(path, mode='wb') as out_fp: - self._stream_write_to_fp(fp, out_fp) - - def _stream_write_to_fp(self, in_fp, out_fp, num_bytes=-1): - """ Copy the specified number of bytes from the input file stream to the output stream. If - num_bytes < 0 copy until the stream ends. - """ - bytes_copied = 0 - bytes_remaining = num_bytes - while bytes_remaining > 0 or num_bytes < 0: - try: - buf = in_fp.read(self.buffer_size) - if not buf: - break - out_fp.write(buf) - bytes_copied += len(buf) - except IOError: - break - - return bytes_copied + self.stream_write_to_fp(fp, out_fp) def list_directory(self, path=None): path = self._init_path(path) @@ -122,21 +103,15 @@ class LocalStorage(BaseStorageV2): with open(self._init_path(self._rel_upload_path(new_uuid), create=True), 'w'): pass - return new_uuid + return new_uuid, {} - def stream_upload_chunk(self, uuid, offset, length, in_fp): + def stream_upload_chunk(self, uuid, offset, length, in_fp, _): with open(self._init_path(self._rel_upload_path(uuid)), 'r+b') as upload_storage: upload_storage.seek(offset) - return self._stream_write_to_fp(in_fp, upload_storage, length) + return self.stream_write_to_fp(in_fp, upload_storage, length), {} - def complete_chunked_upload(self, uuid, final_path, digest_to_verify): + def complete_chunked_upload(self, uuid, final_path, _): content_path = self._rel_upload_path(uuid) - content_digest = digest_tools.sha256_digest_from_generator(self.stream_read(content_path)) - - if not digest_tools.digests_equal(content_digest, digest_to_verify): - msg = 'Given: {0} Computed: {1}'.format(digest_to_verify, content_digest) - raise digest_tools.InvalidDigestException(msg) - final_path_abs = self._init_path(final_path, create=True) if not self.exists(final_path_abs): logger.debug('Moving content into place at path: %s', final_path_abs) @@ -144,6 +119,10 @@ class LocalStorage(BaseStorageV2): else: logger.debug('Content already exists at path: %s', final_path_abs) + def cancel_chunked_upload(self, uuid, _): + content_path = self._init_path(self._rel_upload_path(uuid)) + os.remove(content_path) + def validate(self, client): # Load the set of disk mounts. try: diff --git a/storage/swift.py b/storage/swift.py index 928715fb4..8270b2060 100644 --- a/storage/swift.py +++ b/storage/swift.py @@ -1,4 +1,7 @@ -""" Swift storage driver. Based on: github.com/bacongobbler/docker-registry-driver-swift/ """ +""" Swift storage driver. + + Uses: http://docs.openstack.org/developer/swift/overview_large_objects.html +""" from swiftclient.client import Connection, ClientException from storage.basestorage import BaseStorage from util.registry.generatorfile import GeneratorFile @@ -6,18 +9,27 @@ from urlparse import urlparse from random import SystemRandom from hashlib import sha1 from time import time +from collections import namedtuple +from util.registry import filelike +import copy import hmac import string import logging +from uuid import uuid4 logger = logging.getLogger(__name__) +_PartUploadMetadata = namedtuple('_PartUploadMetadata', ['path', 'offset', 'length']) +_SEGMENTS_KEY = 'segments' +_SEGMENT_DIRECTORY = 'segments' +_MAXIMUM_SEGMENT_SIZE = 5000000000 # 5 GB class SwiftStorage(BaseStorage): def __init__(self, swift_container, storage_path, auth_url, swift_user, swift_password, auth_version=None, os_options=None, ca_cert_path=None, temp_url_key=None): + super(SwiftStorage, self).__init__() self._swift_container = swift_container self._storage_path = storage_path @@ -100,9 +112,10 @@ class SwiftStorage(BaseStorage): logger.exception('Could not get object: %s', path) raise IOError('Path %s not found' % path) - def _put_object(self, path, content, chunk=None, content_type=None, content_encoding=None): + def _put_object(self, path, content, chunk=None, content_type=None, content_encoding=None, + headers=None): path = self._normalize_path(path) - headers = {} + headers = headers or {} if content_encoding is not None: headers['Content-Encoding'] = content_encoding @@ -241,3 +254,79 @@ class SwiftStorage(BaseStorage): raise IOError('Cannot lookup path: %s' % path) return headers.get('etag', '')[1:-1][:7] or self._random_checksum(7) + + @staticmethod + def _segment_list_from_metadata(storage_metadata): + return [_PartUploadMetadata(*segment_args) for segment_args in storage_metadata[_SEGMENTS_KEY]] + + def initiate_chunked_upload(self): + random_uuid = str(uuid4()) + + metadata = { + _SEGMENTS_KEY: [], + } + + return random_uuid, metadata + + def stream_upload_chunk(self, uuid, offset, length, in_fp, storage_metadata): + if length == 0: + return 0, storage_metadata + + # Note: Swift limits segments to a maximum of 5GB, so we keep writing segments until we + # are finished hitting the data limit. + total_bytes_written = 0 + while True: + bytes_written, storage_metadata = self._stream_upload_segment(uuid, offset, length, in_fp, + storage_metadata) + + if length != filelike.READ_UNTIL_END: + length = length - bytes_written + + offset = offset + bytes_written + total_bytes_written = total_bytes_written + bytes_written + if bytes_written == 0 or length <= 0: + return total_bytes_written, storage_metadata + + def _stream_upload_segment(self, uuid, offset, length, in_fp, storage_metadata): + updated_metadata = copy.deepcopy(storage_metadata) + segment_count = len(updated_metadata[_SEGMENTS_KEY]) + segment_path = '%s/%s/%s' % (_SEGMENT_DIRECTORY, uuid, segment_count) + + # Track the number of bytes read and if an explicit length is specified, limit the + # file stream to that length. + if length == filelike.READ_UNTIL_END: + length = _MAXIMUM_SEGMENT_SIZE + else: + length = min(_MAXIMUM_SEGMENT_SIZE, length) + + limiting_fp = filelike.LimitingStream(in_fp, length) + + # Write the segment to Swift. + self.stream_write(segment_path, limiting_fp) + + # We are only going to track keys to which data was confirmed written. + bytes_written = limiting_fp.tell() + if bytes_written > 0: + updated_metadata[_SEGMENTS_KEY].append(_PartUploadMetadata(segment_path, offset, + bytes_written)) + + return bytes_written, updated_metadata + + def complete_chunked_upload(self, uuid, final_path, storage_metadata): + """ Complete the chunked upload and store the final results in the path indicated. + Returns nothing. + """ + # Finally, we write an empty file at the proper location with a X-Object-Manifest + # header pointing to the prefix for the segments. + segments_prefix_path = self._normalize_path('%s/%s/' % (_SEGMENT_DIRECTORY, uuid)) + contained_segments_prefix_path = '%s/%s' % (self._swift_container, segments_prefix_path) + + self._put_object(final_path, '', headers={'X-Object-Manifest': contained_segments_prefix_path}) + + def cancel_chunked_upload(self, uuid, storage_metadata): + """ Cancel the chunked upload and clean up any outstanding partially uploaded data. + Returns nothing. + """ + # Delete all the uploaded segments. + for segment in SwiftStorage._segment_list_from_metadata(storage_metadata): + self.remove(segment.path) diff --git a/test/data/test.db b/test/data/test.db index 5a438c4de..33cbdddfe 100644 Binary files a/test/data/test.db and b/test/data/test.db differ diff --git a/test/registry_tests.py b/test/registry_tests.py index dac59d9b1..43138965f 100644 --- a/test/registry_tests.py +++ b/test/registry_tests.py @@ -1,6 +1,11 @@ import unittest import requests +import os +import math +import random +import string +import Crypto.Random from flask import request, jsonify from flask.blueprints import Blueprint from flask.ext.testing import LiveServerTestCase @@ -8,23 +13,33 @@ from flask.ext.testing import LiveServerTestCase from app import app from data.database import close_db_filter, configure from endpoints.v1 import v1_bp +from endpoints.v2 import v2_bp +from endpoints.v2.manifest import SignedManifestBuilder from endpoints.api import api_bp from initdb import wipe_database, initialize_database, populate_database from endpoints.csrf import generate_csrf_token from tempfile import NamedTemporaryFile +from jsonschema import validate as validate_schema import endpoints.decorated import json import features +import hashlib +import logging import tarfile import shutil +from jwkest.jws import SIGNER_ALGS +from jwkest.jwk import RSAKey +from Crypto.PublicKey import RSA + from cStringIO import StringIO from digest.checksums import compute_simple try: app.register_blueprint(v1_bp, url_prefix='/v1') + app.register_blueprint(v2_bp, url_prefix='/v2') app.register_blueprint(api_bp, url_prefix='/api') except ValueError: # Blueprint was already registered @@ -35,6 +50,8 @@ except ValueError: # DB connection. testbp = Blueprint('testbp', __name__) +logger = logging.getLogger(__name__) + @testbp.route('/csrf', methods=['GET']) def generate_csrf(): @@ -47,13 +64,16 @@ def set_feature(feature_name): features._FEATURES[feature_name].value = request.get_json()['value'] return jsonify({'old_value': old_value}) -@testbp.route('/reloaddb', methods=['POST']) -def reload_db(): +@testbp.route('/reloadapp', methods=['POST']) +def reload_app(): # Close any existing connection. close_db_filter(None) # Reload the database config. configure(app.config) + + # Reload random after the process split, as it cannot be used uninitialized across forks. + Crypto.Random.atfork() return 'OK' app.register_blueprint(testbp, url_prefix='/__test') @@ -82,8 +102,11 @@ class TestFeature(object): data=json.dumps(dict(value=self.old_value)), headers={'Content-Type': 'application/json'}) + _PORT_NUMBER = 5001 _CLEAN_DATABASE_PATH = None +_JWK = RSAKey(key=RSA.generate(2048)) + def get_new_database_uri(): # If a clean copy of the database has not yet been created, create one now. @@ -106,12 +129,15 @@ def get_new_database_uri(): shutil.copy2(_CLEAN_DATABASE_PATH, local_db_file.name) return 'sqlite:///{0}'.format(local_db_file.name) -class RegistryTestCase(LiveServerTestCase): - maxDiff = None +class RegistryTestCaseMixin(LiveServerTestCase): def create_app(self): global _PORT_NUMBER _PORT_NUMBER = _PORT_NUMBER + 1 + + if os.environ.get('DEBUG') == 'true': + app.config['DEBUG'] = True + app.config['TESTING'] = True app.config['LIVESERVER_PORT'] = _PORT_NUMBER app.config['DB_URI'] = get_new_database_uri() @@ -120,134 +146,25 @@ class RegistryTestCase(LiveServerTestCase): def setUp(self): self.clearSession() - # Tell the remote running app to reload the database. By default, the app forks from the + # Tell the remote running app to reload the database and app. By default, the app forks from the # current context and has already loaded the DB config with the *original* DB URL. We call # the remote reload method to force it to pick up the changes to DB_URI set in the create_app # method. - self.conduct('POST', '/__test/reloaddb') + self.conduct('POST', '/__test/reloadapp') def clearSession(self): self.session = requests.Session() self.signature = None self.docker_token = 'true' + self.jwt = None # Load the CSRF token. self.csrf_token = '' self.csrf_token = self.conduct('GET', '/__test/csrf').text - def conduct(self, method, url, headers=None, data=None, auth=None, expected_code=200): - headers = headers or {} - headers['X-Docker-Token'] = self.docker_token - - if self.signature and not auth: - headers['Authorization'] = 'token ' + self.signature - - response = self.session.request(method, self.get_server_url() + url, headers=headers, data=data, - auth=auth, params=dict(_csrf_token=self.csrf_token)) - if response.status_code != expected_code: - print response.text - - if 'www-authenticate' in response.headers: - self.signature = response.headers['www-authenticate'] - - if 'X-Docker-Token' in response.headers: - self.docker_token = response.headers['X-Docker-Token'] - - self.assertEquals(response.status_code, expected_code) - return response - - def ping(self): - self.conduct('GET', '/v1/_ping') - - def do_login(self, username, password='password'): - self.ping() - result = self.conduct('POST', '/v1/users/', - data=json.dumps(dict(username=username, password=password, - email='bar@example.com')), - headers={"Content-Type": "application/json"}, - expected_code=400) - - self.assertEquals(result.text, '"Username or email already exists"') - self.conduct('GET', '/v1/users/', auth=(username, password)) - - def do_push(self, namespace, repository, username, password, images, expected_code=201): - auth = (username, password) - - # Ping! - self.ping() - - # PUT /v1/repositories/{namespace}/{repository}/ - data = [{"id": image['id']} for image in images] - self.conduct('PUT', '/v1/repositories/%s/%s' % (namespace, repository), - data=json.dumps(data), auth=auth, - expected_code=expected_code) - - if expected_code != 201: - return - - for image in images: - # PUT /v1/images/{imageID}/json - self.conduct('PUT', '/v1/images/%s/json' % image['id'], data=json.dumps(image)) - - # PUT /v1/images/{imageID}/layer - tar_file_info = tarfile.TarInfo(name='image_name') - tar_file_info.type = tarfile.REGTYPE - tar_file_info.size = len(image['id']) - - layer_data = StringIO() - - tar_file = tarfile.open(fileobj=layer_data, mode='w|gz') - tar_file.addfile(tar_file_info, StringIO(image['id'])) - tar_file.close() - - layer_bytes = layer_data.getvalue() - layer_data.close() - - self.conduct('PUT', '/v1/images/%s/layer' % image['id'], data=StringIO(layer_bytes)) - - # PUT /v1/images/{imageID}/checksum - checksum = compute_simple(StringIO(layer_bytes), json.dumps(image)) - self.conduct('PUT', '/v1/images/%s/checksum' % image['id'], - headers={'X-Docker-Checksum-Payload': checksum}) - - - # PUT /v1/repositories/{namespace}/{repository}/tags/latest - self.do_tag(namespace, repository, 'latest', images[0]['id']) - - # PUT /v1/repositories/{namespace}/{repository}/images - self.conduct('PUT', '/v1/repositories/%s/%s/images' % (namespace, repository), - expected_code=204) - - - def do_pull(self, namespace, repository, username=None, password='password', expected_code=200): - auth = None - if username: - auth = (username, password) - - # Ping! - self.ping() - - prefix = '/v1/repositories/%s/%s/' % (namespace, repository) - - # GET /v1/repositories/{namespace}/{repository}/ - self.conduct('GET', prefix + 'images', auth=auth, expected_code=expected_code) - if expected_code != 200: - # Push was expected to fail, so nothing more to do for the push. - return - - # GET /v1/repositories/{namespace}/{repository}/ - result = json.loads(self.conduct('GET', prefix + 'tags').text) - - for image_id in result.values(): - # /v1/images/{imageID}/{ancestry, json, layer} - image_prefix = '/v1/images/%s/' % image_id - self.conduct('GET', image_prefix + 'ancestry') - self.conduct('GET', image_prefix + 'json') - self.conduct('GET', image_prefix + 'layer') - def do_tag(self, namespace, repository, tag, image_id, expected_code=200): self.conduct('PUT', '/v1/repositories/%s/%s/tags/%s' % (namespace, repository, tag), - data='"%s"' % image_id, expected_code=expected_code) + data='"%s"' % image_id, expected_code=expected_code, auth='sig') def conduct_api_login(self, username, password): self.conduct('POST', '/api/v1/signin', @@ -260,20 +177,367 @@ class RegistryTestCase(LiveServerTestCase): headers={'Content-Type': 'application/json'}) -class RegistryTests(RegistryTestCase): - def test_push_reponame_with_slashes(self): - # Attempt to add a repository name with slashes. This should fail as we do not support it. - images = [{ - 'id': 'onlyimagehere' - }] - self.do_push('public', 'newrepo/somesubrepo', 'public', 'password', images, expected_code=400) +class BaseRegistryMixin(object): + def conduct(self, method, url, headers=None, data=None, auth=None, params=None, expected_code=200): + params = params or {} + params['_csrf_token'] = self.csrf_token + headers = headers or {} + auth_tuple = None + + if self.docker_token: + headers['X-Docker-Token'] = self.docker_token + + if auth == 'sig': + if self.signature: + headers['Authorization'] = 'token ' + self.signature + elif auth == 'jwt': + if self.jwt: + headers['Authorization'] = 'Bearer ' + self.jwt + elif auth: + auth_tuple = auth + + response = self.session.request(method, self.get_server_url() + url, headers=headers, data=data, + auth=auth_tuple, params=params) + if response.status_code != expected_code: + print response.text + + if 'www-authenticate' in response.headers: + self.signature = response.headers['www-authenticate'] + + if 'X-Docker-Token' in response.headers: + self.docker_token = response.headers['X-Docker-Token'] + + self.assertEquals(response.status_code, expected_code) + return response + + def _get_default_images(self): + return [{'id': 'someid', 'contents': 'somecontent'}] + + +class V1RegistryMixin(BaseRegistryMixin): + def v1_ping(self): + self.conduct('GET', '/v1/_ping') + + +class V1RegistryPushMixin(V1RegistryMixin): + def do_push(self, namespace, repository, username, password, images=None, expected_code=201): + images = images or self._get_default_images() + auth = (username, password) + + # Ping! + self.v1_ping() + + # PUT /v1/repositories/{namespace}/{repository}/ + self.conduct('PUT', '/v1/repositories/%s/%s' % (namespace, repository), + data=json.dumps(images), auth=auth, + expected_code=expected_code) + + if expected_code != 201: + return + + last_image_id = None + for image_data in images: + image_id = image_data['id'] + last_image_id = image_id + + # PUT /v1/images/{imageID}/json + self.conduct('PUT', '/v1/images/%s/json' % image_id, + data=json.dumps({'id': image_id}), auth='sig') + + # PUT /v1/images/{imageID}/layer + tar_file_info = tarfile.TarInfo(name='image_name') + tar_file_info.type = tarfile.REGTYPE + tar_file_info.size = len(image_id) + + layer_data = StringIO() + + tar_file = tarfile.open(fileobj=layer_data, mode='w|gz') + tar_file.addfile(tar_file_info, StringIO(image_id)) + tar_file.close() + + layer_bytes = layer_data.getvalue() + layer_data.close() + + self.conduct('PUT', '/v1/images/%s/layer' % image_id, + data=StringIO(layer_bytes), auth='sig') + + # PUT /v1/images/{imageID}/checksum + checksum = compute_simple(StringIO(layer_bytes), json.dumps({'id': image_id})) + self.conduct('PUT', '/v1/images/%s/checksum' % image_id, + headers={'X-Docker-Checksum-Payload': checksum}, + auth='sig') + + + # PUT /v1/repositories/{namespace}/{repository}/tags/latest + self.do_tag(namespace, repository, 'latest', images[0]['id']) + + # PUT /v1/repositories/{namespace}/{repository}/images + self.conduct('PUT', '/v1/repositories/%s/%s/images' % (namespace, repository), + expected_code=204, + auth='sig') + + +class V1RegistryPullMixin(V1RegistryMixin): + def do_pull(self, namespace, repository, username=None, password='password', expected_code=200, + images=None): + images = images or self._get_default_images() + + auth = None + if username: + auth = (username, password) + + # Ping! + self.v1_ping() + + prefix = '/v1/repositories/%s/%s/' % (namespace, repository) + + # GET /v1/repositories/{namespace}/{repository}/ + self.conduct('GET', prefix + 'images', auth=auth, expected_code=expected_code) + if expected_code != 200: + return + + # GET /v1/repositories/{namespace}/{repository}/ + result = json.loads(self.conduct('GET', prefix + 'tags', auth='sig').text) + + self.assertEquals(len(images), len(result.values())) + + for image_data in images: + image_id = image_data['id'] + self.assertIn(image_id, result.values()) + + # /v1/images/{imageID}/{ancestry, json, layer} + image_prefix = '/v1/images/%s/' % image_id + self.conduct('GET', image_prefix + 'ancestry', auth='sig') + self.conduct('GET', image_prefix + 'json', auth='sig') + self.conduct('GET', image_prefix + 'layer', auth='sig') + + + +class V2RegistryMixin(BaseRegistryMixin): + MANIFEST_SCHEMA = { + 'type': 'object', + 'properties': { + 'name': { + 'type': 'string', + }, + 'tag': { + 'type': 'string', + }, + 'signatures': { + 'type': 'array', + 'itemType': { + 'type': 'object', + }, + }, + 'fsLayers': { + 'type': 'array', + 'itemType': { + 'type': 'object', + 'properties': { + 'blobSum': { + 'type': 'string', + }, + }, + 'required': 'blobSum', + }, + }, + 'history': { + 'type': 'array', + 'itemType': { + 'type': 'object', + 'properties': { + 'v1Compatibility': { + 'type': 'object', + }, + }, + 'required': ['v1Compatibility'], + }, + }, + }, + 'required': ['name', 'tag', 'fsLayers', 'history', 'signatures'], + } + + def v2_ping(self): + response = self.conduct('GET', '/v2/', expected_code=200 if self.jwt else 401, auth='jwt') + self.assertEquals(response.headers['Docker-Distribution-API-Version'], 'registry/2.0') + + + def do_auth(self, username, password, namespace, repository, expected_code=200, scopes=[]): + auth = (username, password) + params = { + 'account': username, + 'scope': 'repository:%s/%s:%s' % (namespace, repository, ','.join(scopes)), + 'service': app.config['SERVER_HOSTNAME'], + } + + response = self.conduct('GET', '/v2/auth', params=params, auth=(username, password), + expected_code=expected_code) + + if expected_code == 200: + response_json = json.loads(response.text) + self.assertIsNotNone(response_json.get('token')) + self.jwt = response_json['token'] + + return response + + +class V2RegistryPushMixin(V2RegistryMixin): + def do_push(self, namespace, repository, username, password, images=None, tag_name=None, + cancel=False, invalid=False, expected_manifest_code=202, expected_auth_code=200): + images = images or self._get_default_images() + + # Ping! + self.v2_ping() + + # Auth. + self.do_auth(username, password, namespace, repository, scopes=['push', 'pull'], + expected_code=expected_auth_code) + + if expected_auth_code != 200: + return + + # Build a fake manifest. + tag_name = tag_name or 'latest' + builder = SignedManifestBuilder(namespace, repository, tag_name) + for image_data in images: + checksum = 'sha256:' + hashlib.sha256(image_data['contents']).hexdigest() + if invalid: + checksum = 'sha256:' + hashlib.sha256('foobarbaz').hexdigest() + + builder.add_layer(checksum, json.dumps(image_data)) + + # Build the manifest. + manifest = builder.build(_JWK) + + # Push the image's layers. + checksums = {} + for image_data in images: + image_id = image_data['id'] + full_contents = image_data['contents'] + chunks = image_data.get('chunks') + + # Layer data should not yet exist. + checksum = 'sha256:' + hashlib.sha256(full_contents).hexdigest() + self.conduct('HEAD', '/v2/%s/%s/blobs/%s' % (namespace, repository, checksum), + expected_code=404, auth='jwt') + + # Start a new upload of the layer data. + response = self.conduct('POST', '/v2/%s/%s/blobs/uploads/' % (namespace, repository), + expected_code=202, auth='jwt') + + upload_uuid = response.headers['Docker-Upload-UUID'] + location = response.headers['Location'][len(self.get_server_url()):] + + # PATCH the image data into the layer. + if chunks is None: + self.conduct('PATCH', location, data=full_contents, expected_code=204, auth='jwt') + else: + for chunk in chunks: + if len(chunk) == 3: + (start_byte, end_byte, expected_code) = chunk + else: + (start_byte, end_byte) = chunk + expected_code = 204 + + contents_chunk = full_contents[start_byte:end_byte] + self.conduct('PATCH', location, data=contents_chunk, expected_code=expected_code, auth='jwt', + headers={'Range': 'bytes=%s-%s' % (start_byte, end_byte)}) + + if expected_code != 204: + return + + # Retrieve the upload status at each point. + status_url = '/v2/%s/%s/blobs/uploads/%s' % (namespace, repository, upload_uuid) + response = self.conduct('GET', status_url, expected_code=204, auth='jwt', + headers=dict(host=self.get_server_url())) + self.assertEquals(response.headers['Docker-Upload-UUID'], upload_uuid) + self.assertEquals(response.headers['Range'], "bytes=0-%s" % end_byte) + + if cancel: + self.conduct('DELETE', location, params=dict(digest=checksum), expected_code=204, + auth='jwt') + + # Ensure the upload was canceled. + status_url = '/v2/%s/%s/blobs/uploads/%s' % (namespace, repository, upload_uuid) + self.conduct('GET', status_url, expected_code=404, auth='jwt', + headers=dict(host=self.get_server_url())) + return + + # Finish the layer upload with a PUT. + response = self.conduct('PUT', location, params=dict(digest=checksum), expected_code=201, + auth='jwt') + + self.assertEquals(response.headers['Docker-Content-Digest'], checksum) + checksums[image_id] = checksum + + # Ensure the layer exists now. + response = self.conduct('HEAD', '/v2/%s/%s/blobs/%s' % (namespace, repository, checksum), + expected_code=200, auth='jwt') + self.assertEquals(response.headers['Docker-Content-Digest'], checksum) + self.assertEquals(response.headers['Content-Length'], str(len(full_contents))) + + # Write the manifest. + put_code = 404 if invalid else expected_manifest_code + self.conduct('PUT', '/v2/%s/%s/manifests/%s' % (namespace, repository, tag_name), + data=manifest.bytes, expected_code=put_code, + headers={'Content-Type': 'application/json'}, auth='jwt') + + return checksums, manifest.digest + + +class V2RegistryPullMixin(V2RegistryMixin): + def do_pull(self, namespace, repository, username=None, password='password', expected_code=200, + manifest_id=None, expected_manifest_code=200, images=None): + images = images or self._get_default_images() + + # Ping! + self.v2_ping() + + # Auth. + self.do_auth(username, password, namespace, repository, scopes=['pull'], + expected_code=expected_code) + if expected_code != 200: + return + + # Retrieve the manifest for the tag or digest. + manifest_id = manifest_id or 'latest' + response = self.conduct('GET', '/v2/%s/%s/manifests/%s' % (namespace, repository, manifest_id), + auth='jwt', expected_code=expected_manifest_code) + if expected_manifest_code != 200: + return + + manifest_data = json.loads(response.text) + + # Ensure the manifest returned by us is valid. + validate_schema(manifest_data, V2RegistryMixin.MANIFEST_SCHEMA) + + # Verify the layers. + blobs = {} + for layer in manifest_data['fsLayers']: + blob_id = layer['blobSum'] + result = self.conduct('GET', '/v2/%s/%s/blobs/%s' % (namespace, repository, blob_id), + expected_code=200, auth='jwt') + + blobs[blob_id] = result.text + + # Verify the V1 metadata is present for each expected image. + found_v1_layers = set() + history = manifest_data['history'] + for entry in history: + v1_history = json.loads(entry['v1Compatibility']) + found_v1_layers.add(v1_history['id']) + + for image in images: + self.assertIn(image['id'], found_v1_layers) + + return blobs + + +class RegistryTestsMixin(object): def test_pull_publicrepo_anonymous(self): # Add a new repository under the public user, so we have a real repository to pull. - images = [{ - 'id': 'onlyimagehere' - }] - self.do_push('public', 'newrepo', 'public', 'password', images) + self.do_push('public', 'newrepo', 'public', 'password') self.clearSession() # First try to pull the (currently private) repo anonymously, which should fail (since it is @@ -291,10 +555,7 @@ class RegistryTests(RegistryTestCase): def test_pull_publicrepo_devtable(self): # Add a new repository under the public user, so we have a real repository to pull. - images = [{ - 'id': 'onlyimagehere' - }] - self.do_push('public', 'newrepo', 'public', 'password', images) + self.do_push('public', 'newrepo', 'public', 'password') self.clearSession() # First try to pull the (currently private) repo as devtable, which should fail as it belongs @@ -312,10 +573,7 @@ class RegistryTests(RegistryTestCase): def test_pull_private_repo(self): # Add a new repository under the devtable user, so we have a real repository to pull. - images = [{ - 'id': 'onlyimagehere' - }] - self.do_push('devtable', 'newrepo', 'devtable', 'password', images) + self.do_push('devtable', 'newrepo', 'devtable', 'password') self.clearSession() # First try to pull the (currently private) repo as public, which should fail as it belongs @@ -331,10 +589,7 @@ class RegistryTests(RegistryTestCase): # Turn off anonymous access. with TestFeature(self, 'ANONYMOUS_ACCESS', False): # Add a new repository under the public user, so we have a real repository to pull. - images = [{ - 'id': 'onlyimagehere' - }] - self.do_push('public', 'newrepo', 'public', 'password', images) + self.do_push('public', 'newrepo', 'public', 'password') self.clearSession() # First try to pull the (currently private) repo as devtable, which should fail as it belongs @@ -354,10 +609,7 @@ class RegistryTests(RegistryTestCase): # Turn off anonymous access. with TestFeature(self, 'ANONYMOUS_ACCESS', False): # Add a new repository under the public user, so we have a real repository to pull. - images = [{ - 'id': 'onlyimagehere' - }] - self.do_push('public', 'newrepo', 'public', 'password', images) + self.do_push('public', 'newrepo', 'public', 'password') self.clearSession() # First try to pull the (currently private) repo as devtable, which should fail as it belongs @@ -372,10 +624,7 @@ class RegistryTests(RegistryTestCase): # Turn off anonymous access. with TestFeature(self, 'ANONYMOUS_ACCESS', False): # Add a new repository under the public user, so we have a real repository to pull. - images = [{ - 'id': 'onlyimagehere' - }] - self.do_push('public', 'newrepo', 'public', 'password', images) + self.do_push('public', 'newrepo', 'public', 'password') self.clearSession() # First try to pull the (currently private) repo as anonymous, which should fail as it @@ -399,10 +648,7 @@ class RegistryTests(RegistryTestCase): def test_create_repo_creator_user(self): - images = [{ - 'id': 'onlyimagehere' - }] - self.do_push('buynlarge', 'newrepo', 'creator', 'password', images) + self.do_push('buynlarge', 'newrepo', 'creator', 'password') # Pull the repository as devtable, which should succeed because the repository is owned by the # org. @@ -415,10 +661,7 @@ class RegistryTests(RegistryTestCase): resp = self.conduct('GET', '/api/v1/organization/buynlarge/robots/ownerbot') robot_token = json.loads(resp.text)['token'] - images = [{ - 'id': 'onlyimagehere' - }] - self.do_push('buynlarge', 'newrepo', 'buynlarge+ownerbot', robot_token, images) + self.do_push('buynlarge', 'newrepo', 'buynlarge+ownerbot', robot_token) # Pull the repository as devtable, which should succeed because the repository is owned by the # org. @@ -431,16 +674,23 @@ class RegistryTests(RegistryTestCase): resp = self.conduct('GET', '/api/v1/organization/buynlarge/robots/creatorbot') robot_token = json.loads(resp.text)['token'] - images = [{ - 'id': 'onlyimagehere' - }] - self.do_push('buynlarge', 'newrepo', 'buynlarge+creatorbot', robot_token, images) + self.do_push('buynlarge', 'newrepo', 'buynlarge+creatorbot', robot_token) # Pull the repository as devtable, which should succeed because the repository is owned by the # org. self.do_pull('buynlarge', 'newrepo', 'devtable', 'password') +class V1RegistryTests(V1RegistryPullMixin, V1RegistryPushMixin, RegistryTestsMixin, + RegistryTestCaseMixin, LiveServerTestCase): + """ Tests for V1 registry. """ + def test_push_reponame_with_slashes(self): + # Attempt to add a repository name with slashes. This should fail as we do not support it. + images = [{ + 'id': 'onlyimagehere' + }] + self.do_push('public', 'newrepo/somesubrepo', 'public', 'password', images, expected_code=400) + def test_push_unicode_metadata(self): self.conduct_api_login('devtable', 'password') @@ -450,8 +700,7 @@ class RegistryTests(RegistryTestCase): }] self.do_push('devtable', 'unicodetest', 'devtable', 'password', images) - self.do_pull('devtable', 'unicodetest', 'devtable', 'password') - + self.do_pull('devtable', 'unicodetest', 'devtable', 'password', images=images) def test_tag_validation(self): image_id = 'onlyimagehere' @@ -467,5 +716,209 @@ class RegistryTests(RegistryTestCase): self.do_tag('public', 'newrepo', '-fail', image_id, expected_code=400) +class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMixin, + RegistryTestCaseMixin, LiveServerTestCase): + """ Tests for V2 registry. """ + + def test_push_reponame_with_slashes(self): + # Attempt to add a repository name with slashes. This should fail as we do not support it. + images = [{ + 'id': 'onlyimagehere' + }] + self.do_push('public', 'newrepo/somesubrepo', 'devtable', 'password', images, + expected_auth_code=400) + + def test_invalid_push(self): + self.do_push('devtable', 'newrepo', 'devtable', 'password', invalid=True) + + def test_cancel_push(self): + self.do_push('devtable', 'newrepo', 'devtable', 'password', cancel=True) + + + def test_pull_by_checksum(self): + # Add a new repository under the user, so we have a real repository to pull. + _, digest = self.do_push('devtable', 'newrepo', 'devtable', 'password') + + # Attempt to pull by digest. + self.do_pull('devtable', 'newrepo', 'devtable', 'password', manifest_id=digest) + + + def test_pull_invalid_image_tag(self): + # Add a new repository under the user, so we have a real repository to pull. + self.do_push('devtable', 'newrepo', 'devtable', 'password') + self.clearSession() + + # Attempt to pull the invalid tag. + self.do_pull('devtable', 'newrepo', 'devtable', 'password', manifest_id='invalid', + expected_manifest_code=404) + + + def test_partial_upload_below_5mb(self): + chunksize = 1024 * 1024 * 2 + size = chunksize * 3 + contents = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(size)) + + chunk_count = int(math.ceil((len(contents) * 1.0) / chunksize)) + chunks = [(index * chunksize, (index + 1)*chunksize) for index in range(chunk_count)] + + images = [ + { + 'id':'someid', + 'contents': contents, + 'chunks': chunks + } + ] + + # Push the chunked upload. + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) + + # Pull the image back and verify the contents. + blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password', images=images) + self.assertEquals(len(blobs.items()), 1) + self.assertEquals(blobs.items()[0][1], contents) + + def test_partial_upload_way_below_5mb(self): + size = 1024 + contents = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(size)) + chunks = [(0, 100), (100, size)] + + images = [ + { + 'id':'someid', + 'contents': contents, + 'chunks': chunks + } + ] + + # Push the chunked upload. + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) + + # Pull the image back and verify the contents. + blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password', images=images) + self.assertEquals(len(blobs.items()), 1) + self.assertEquals(blobs.items()[0][1], contents) + + def test_partial_upload_resend_below_5mb(self): + size = 150 + contents = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(size)) + + chunks = [(0, 100), (10, size)] + + images = [ + { + 'id':'someid', + 'contents': contents, + 'chunks': chunks + } + ] + + # Push the chunked upload. + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) + + # Pull the image back and verify the contents. + blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password', images=images) + self.assertEquals(len(blobs.items()), 1) + self.assertEquals(blobs.items()[0][1], contents) + + def test_partial_upload_try_resend_with_gap(self): + size = 150 + contents = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(size)) + + chunks = [(0, 100), (101, size, 416)] + + images = [ + { + 'id':'someid', + 'contents': contents, + 'chunks': chunks + } + ] + + # Attempt to push the chunked upload, which should fail. + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) + + def test_multiple_layers_invalid(self): + # Attempt to push a manifest with an image depending on an unknown base layer. + images = [ + { + 'id': 'latestid', + 'contents': 'the latest image', + 'parent': 'baseid', + } + ] + + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images, + expected_manifest_code=400) + + def test_multiple_layers(self): + # Push a manifest with multiple layers. + images = [ + { + 'id': 'latestid', + 'contents': 'the latest image', + 'parent': 'baseid', + }, + { + 'id': 'baseid', + 'contents': 'The base image', + } + ] + + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images) + + def test_invalid_regname(self): + self.do_push('devtable', 'this/is/a/repo', 'devtable', 'password', expected_auth_code=400) + + def test_multiple_tags(self): + latest_images = [ + { + 'id': 'latestid', + 'contents': 'the latest image' + } + ] + + foobar_images = [ + { + 'id': 'foobarid', + 'contents': 'the foobar image', + } + ] + + # Create the repo. + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=latest_images, + tag_name='latest') + + self.do_push('devtable', 'newrepo', 'devtable', 'password', images=foobar_images, + tag_name='foobar') + + # Retrieve the tags. + response = self.conduct('GET', '/v2/devtable/newrepo/tags/list', auth='jwt', expected_code=200) + data = json.loads(response.text) + self.assertEquals(data['name'], "devtable/newrepo") + self.assertIn('latest', data['tags']) + self.assertIn('foobar', data['tags']) + + # Retrieve the tags with pagination. + response = self.conduct('GET', '/v2/devtable/newrepo/tags/list', auth='jwt', + params=dict(n=1), expected_code=200) + + data = json.loads(response.text) + self.assertEquals(data['name'], "devtable/newrepo") + self.assertEquals(len(data['tags']), 1) + self.assertTrue(response.headers['Link'].find('n=1&last=2') > 0) + + # Try to get tags before a repo exists. + self.conduct('GET', '/v2/devtable/doesnotexist/tags/list', auth='jwt', expected_code=401) + + + +class V1PushV2PullRegistryTests(V2RegistryPullMixin, V1RegistryPushMixin, RegistryTestsMixin, + RegistryTestCaseMixin, LiveServerTestCase): + """ Tests for V1 push, V2 pull registry. """ + +class V1PullV2PushRegistryTests(V1RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMixin, + RegistryTestCaseMixin, LiveServerTestCase): + """ Tests for V1 pull, V2 push registry. """ + if __name__ == '__main__': unittest.main() diff --git a/test/specs.py b/test/specs.py index 3e5b4e585..4552f5eac 100644 --- a/test/specs.py +++ b/test/specs.py @@ -1,22 +1,33 @@ import json +import hashlib from flask import url_for from uuid import uuid4 from base64 import b64encode +from util.names import parse_namespace_repository NO_REPO = None -PUBLIC_REPO = 'public/publicrepo' -PRIVATE_REPO = 'devtable/shared' +PUBLIC = 'public' +PUBLIC_REPO_NAME = 'publicrepo' +PUBLIC_REPO = PUBLIC + '/' + PUBLIC_REPO_NAME + +PRIVATE = 'devtable' +PRIVATE_REPO_NAME = 'shared' +PRIVATE_REPO = PRIVATE + '/' + PRIVATE_REPO_NAME ORG = 'buynlarge' ORG_REPO = ORG + '/orgrepo' +ORG_REPO_NAME = 'orgrepo' ORG_READERS = 'readers' ORG_OWNER = 'devtable' ORG_OWNERS = 'owners' ORG_READERS = 'readers' +FAKE_MANIFEST = 'unknown_tag' +FAKE_DIGEST = 'sha256:' + hashlib.sha256(str(uuid4())).hexdigest() FAKE_IMAGE_ID = str(uuid4()) +FAKE_UPLOAD_ID = str(uuid4()) FAKE_TAG_NAME = str(uuid4()) FAKE_USERNAME = str(uuid4()) FAKE_TOKEN = str(uuid4()) @@ -72,7 +83,7 @@ UPDATE_REPO_DETAILS = { } -class IndexTestSpec(object): +class IndexV1TestSpec(object): def __init__(self, url, sess_repo=None, anon_code=403, no_access_code=403, read_code=200, admin_code=200): self._url = url @@ -103,132 +114,347 @@ class IndexTestSpec(object): 'method': self._method } - if self._data or self._method == 'POST' or self._method == 'PUT': + if self._data or self._method == 'POST' or self._method == 'PUT' or self._method == 'PATCH': kwargs['data'] = self._data if self._data else '{}' kwargs['content_type'] = 'application/json' return self._url, kwargs -def build_index_specs(): +def build_v1_index_specs(): return [ - IndexTestSpec(url_for('v1.get_image_layer', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.get_image_layer', image_id=FAKE_IMAGE_ID), PUBLIC_REPO, 404, 404, 404, 404), - IndexTestSpec(url_for('v1.get_image_layer', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.get_image_layer', image_id=FAKE_IMAGE_ID), PRIVATE_REPO, 403, 403, 404, 404), - IndexTestSpec(url_for('v1.get_image_layer', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.get_image_layer', image_id=FAKE_IMAGE_ID), ORG_REPO, 403, 403, 404, 404), - IndexTestSpec(url_for('v1.put_image_layer', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.put_image_layer', image_id=FAKE_IMAGE_ID), PUBLIC_REPO, 403, 403, 403, 403).set_method('PUT'), - IndexTestSpec(url_for('v1.put_image_layer', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.put_image_layer', image_id=FAKE_IMAGE_ID), PRIVATE_REPO, 403, 403, 403, 404).set_method('PUT'), - IndexTestSpec(url_for('v1.put_image_layer', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.put_image_layer', image_id=FAKE_IMAGE_ID), ORG_REPO, 403, 403, 403, 404).set_method('PUT'), - IndexTestSpec(url_for('v1.put_image_checksum', + IndexV1TestSpec(url_for('v1.put_image_checksum', image_id=FAKE_IMAGE_ID), PUBLIC_REPO, 403, 403, 403, 403).set_method('PUT'), - IndexTestSpec(url_for('v1.put_image_checksum', + IndexV1TestSpec(url_for('v1.put_image_checksum', image_id=FAKE_IMAGE_ID), PRIVATE_REPO, 403, 403, 403, 400).set_method('PUT'), - IndexTestSpec(url_for('v1.put_image_checksum', + IndexV1TestSpec(url_for('v1.put_image_checksum', image_id=FAKE_IMAGE_ID), ORG_REPO, 403, 403, 403, 400).set_method('PUT'), - IndexTestSpec(url_for('v1.get_image_json', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.get_image_json', image_id=FAKE_IMAGE_ID), PUBLIC_REPO, 404, 404, 404, 404), - IndexTestSpec(url_for('v1.get_image_json', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.get_image_json', image_id=FAKE_IMAGE_ID), PRIVATE_REPO, 403, 403, 404, 404), - IndexTestSpec(url_for('v1.get_image_json', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.get_image_json', image_id=FAKE_IMAGE_ID), ORG_REPO, 403, 403, 404, 404), - IndexTestSpec(url_for('v1.get_image_ancestry', - image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.get_image_ancestry', image_id=FAKE_IMAGE_ID), PUBLIC_REPO, 404, 404, 404, 404), - IndexTestSpec(url_for('v1.get_image_ancestry', - image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.get_image_ancestry', image_id=FAKE_IMAGE_ID), PRIVATE_REPO, 403, 403, 404, 404), - IndexTestSpec(url_for('v1.get_image_ancestry', - image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.get_image_ancestry', image_id=FAKE_IMAGE_ID), ORG_REPO, 403, 403, 404, 404), - IndexTestSpec(url_for('v1.put_image_json', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.put_image_json', image_id=FAKE_IMAGE_ID), PUBLIC_REPO, 403, 403, 403, 403).set_method('PUT'), - IndexTestSpec(url_for('v1.put_image_json', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.put_image_json', image_id=FAKE_IMAGE_ID), PRIVATE_REPO, 403, 403, 403, 400).set_method('PUT'), - IndexTestSpec(url_for('v1.put_image_json', image_id=FAKE_IMAGE_ID), + IndexV1TestSpec(url_for('v1.put_image_json', image_id=FAKE_IMAGE_ID), ORG_REPO, 403, 403, 403, 400).set_method('PUT'), - IndexTestSpec(url_for('v1.create_user'), NO_REPO, 400, 400, 400, + IndexV1TestSpec(url_for('v1.create_user'), NO_REPO, 400, 400, 400, 400).set_method('POST').set_data_from_obj(NEW_USER_DETAILS), - IndexTestSpec(url_for('v1.get_user'), NO_REPO, 404, 200, 200, 200), + IndexV1TestSpec(url_for('v1.get_user'), NO_REPO, 404, 200, 200, 200), - IndexTestSpec(url_for('v1.update_user', username=FAKE_USERNAME), + IndexV1TestSpec(url_for('v1.update_user', username=FAKE_USERNAME), NO_REPO, 403, 403, 403, 403).set_method('PUT'), - IndexTestSpec(url_for('v1.create_repository', repository=PUBLIC_REPO), + IndexV1TestSpec(url_for('v1.create_repository', repository=PUBLIC_REPO), NO_REPO, 403, 403, 403, 403).set_method('PUT'), - IndexTestSpec(url_for('v1.create_repository', repository=PRIVATE_REPO), + IndexV1TestSpec(url_for('v1.create_repository', repository=PRIVATE_REPO), NO_REPO, 403, 403, 403, 201).set_method('PUT'), - IndexTestSpec(url_for('v1.create_repository', repository=ORG_REPO), + IndexV1TestSpec(url_for('v1.create_repository', repository=ORG_REPO), NO_REPO, 403, 403, 403, 201).set_method('PUT'), - IndexTestSpec(url_for('v1.update_images', repository=PUBLIC_REPO), + IndexV1TestSpec(url_for('v1.update_images', repository=PUBLIC_REPO), NO_REPO, 403, 403, 403, 403).set_method('PUT'), - IndexTestSpec(url_for('v1.update_images', repository=PRIVATE_REPO), + IndexV1TestSpec(url_for('v1.update_images', repository=PRIVATE_REPO), NO_REPO, 403, 403, 403, 204).set_method('PUT'), - IndexTestSpec(url_for('v1.update_images', repository=ORG_REPO), NO_REPO, + IndexV1TestSpec(url_for('v1.update_images', repository=ORG_REPO), NO_REPO, 403, 403, 403, 204).set_method('PUT'), - IndexTestSpec(url_for('v1.get_repository_images', + IndexV1TestSpec(url_for('v1.get_repository_images', repository=PUBLIC_REPO), NO_REPO, 200, 200, 200, 200), - IndexTestSpec(url_for('v1.get_repository_images', + IndexV1TestSpec(url_for('v1.get_repository_images', repository=PRIVATE_REPO)), - IndexTestSpec(url_for('v1.get_repository_images', repository=ORG_REPO)), + IndexV1TestSpec(url_for('v1.get_repository_images', repository=ORG_REPO)), - IndexTestSpec(url_for('v1.delete_repository_images', + IndexV1TestSpec(url_for('v1.delete_repository_images', repository=PUBLIC_REPO), NO_REPO, 501, 501, 501, 501).set_method('DELETE'), - IndexTestSpec(url_for('v1.put_repository_auth', repository=PUBLIC_REPO), + IndexV1TestSpec(url_for('v1.put_repository_auth', repository=PUBLIC_REPO), NO_REPO, 501, 501, 501, 501).set_method('PUT'), - IndexTestSpec(url_for('v1.get_search'), NO_REPO, 200, 200, 200, 200), + IndexV1TestSpec(url_for('v1.get_search'), NO_REPO, 200, 200, 200, 200), - IndexTestSpec(url_for('v1.ping'), NO_REPO, 200, 200, 200, 200), + IndexV1TestSpec(url_for('v1.ping'), NO_REPO, 200, 200, 200, 200), - IndexTestSpec(url_for('v1.get_tags', repository=PUBLIC_REPO), NO_REPO, + IndexV1TestSpec(url_for('v1.get_tags', repository=PUBLIC_REPO), NO_REPO, 200, 200, 200, 200), - IndexTestSpec(url_for('v1.get_tags', repository=PRIVATE_REPO)), - IndexTestSpec(url_for('v1.get_tags', repository=ORG_REPO)), + IndexV1TestSpec(url_for('v1.get_tags', repository=PRIVATE_REPO)), + IndexV1TestSpec(url_for('v1.get_tags', repository=ORG_REPO)), - IndexTestSpec(url_for('v1.get_tag', repository=PUBLIC_REPO, - tag=FAKE_TAG_NAME), NO_REPO, 400, 400, 400, 400), - IndexTestSpec(url_for('v1.get_tag', repository=PRIVATE_REPO, - tag=FAKE_TAG_NAME), NO_REPO, 403, 403, 400, 400), - IndexTestSpec(url_for('v1.get_tag', repository=ORG_REPO, - tag=FAKE_TAG_NAME), NO_REPO, 403, 403, 400, 400), + IndexV1TestSpec(url_for('v1.get_tag', repository=PUBLIC_REPO, + tag=FAKE_TAG_NAME), NO_REPO, 404, 404, 404, 404), + IndexV1TestSpec(url_for('v1.get_tag', repository=PRIVATE_REPO, + tag=FAKE_TAG_NAME), NO_REPO, 403, 403, 404, 404), + IndexV1TestSpec(url_for('v1.get_tag', repository=ORG_REPO, + tag=FAKE_TAG_NAME), NO_REPO, 403, 403, 404, 404), - IndexTestSpec(url_for('v1.put_tag', repository=PUBLIC_REPO, + IndexV1TestSpec(url_for('v1.put_tag', repository=PUBLIC_REPO, tag=FAKE_TAG_NAME), NO_REPO, 403, 403, 403, 403).set_method('PUT'), - IndexTestSpec(url_for('v1.put_tag', repository=PRIVATE_REPO, + IndexV1TestSpec(url_for('v1.put_tag', repository=PRIVATE_REPO, tag=FAKE_TAG_NAME), NO_REPO, 403, 403, 403, 400).set_method('PUT'), - IndexTestSpec(url_for('v1.put_tag', repository=ORG_REPO, + IndexV1TestSpec(url_for('v1.put_tag', repository=ORG_REPO, tag=FAKE_TAG_NAME), NO_REPO, 403, 403, 403, 400).set_method('PUT'), - IndexTestSpec(url_for('v1.delete_tag', repository=PUBLIC_REPO, + IndexV1TestSpec(url_for('v1.delete_tag', repository=PUBLIC_REPO, tag=FAKE_TAG_NAME), NO_REPO, 403, 403, 403, 403).set_method('DELETE'), - IndexTestSpec(url_for('v1.delete_tag', repository=PRIVATE_REPO, + IndexV1TestSpec(url_for('v1.delete_tag', repository=PRIVATE_REPO, tag=FAKE_TAG_NAME), NO_REPO, 403, 403, 403, 400).set_method('DELETE'), - IndexTestSpec(url_for('v1.delete_tag', repository=ORG_REPO, + IndexV1TestSpec(url_for('v1.delete_tag', repository=ORG_REPO, tag=FAKE_TAG_NAME), NO_REPO, 403, 403, 403, 400).set_method('DELETE'), ] + + +class IndexV2TestSpec(object): + def __init__(self, index_name, method_name, repo_name, scope=None, **kwargs): + self.index_name = index_name + self.repo_name = repo_name + self.method_name = method_name + + default_scope = 'push,pull' if method_name != 'GET' and method_name != 'HEAD' else 'pull' + self.scope = scope or default_scope + + self.kwargs = kwargs + + self.auth_no_access_code = 403 + self.auth_read_code = 403 + self.auth_admin_code = 403 + + self.anon_code = 401 + self.no_access_code = 403 + self.read_code = 200 + self.admin_code = 200 + + def auth_status(self, auth_no_access_code=403, auth_read_code=200, auth_admin_code=200): + self.auth_no_access_code = auth_no_access_code + self.auth_read_code = auth_read_code + self.auth_admin_code = auth_admin_code + return self + + def request_status(self, anon_code=401, no_access_code=403, read_code=200, admin_code=200): + self.anon_code = anon_code + self.no_access_code = no_access_code + self.read_code = read_code + self.admin_code = admin_code + return self + + def get_url(self): + namespace, repo_name = parse_namespace_repository(self.repo_name) + return url_for(self.index_name, namespace=namespace, repo_name=repo_name, **self.kwargs) + + def gen_basic_auth(self, username, password): + encoded = b64encode('%s:%s' % (username, password)) + return 'basic %s' % encoded + + def get_scope_string(self): + return 'repository:%s:%s' % (self.repo_name, self.scope) + + +def build_v2_index_specs(): + return [ + # v2.list_all_tags + IndexV2TestSpec('v2.list_all_tags', 'GET', PUBLIC_REPO). + auth_status(200, 200, 200). + request_status(200, 200, 200, 200), + + IndexV2TestSpec('v2.list_all_tags', 'GET', PRIVATE_REPO). + auth_status(403, 200, 200). + request_status(401, 401, 200, 200), + + IndexV2TestSpec('v2.list_all_tags', 'GET', ORG_REPO). + auth_status(403, 200, 200). + request_status(401, 401, 200, 200), + + # v2.fetch_manifest_by_tagname + IndexV2TestSpec('v2.fetch_manifest_by_tagname', 'GET', PUBLIC_REPO, manifest_ref=FAKE_MANIFEST). + auth_status(200, 200, 200). + request_status(404, 404, 404, 404), + + IndexV2TestSpec('v2.fetch_manifest_by_tagname', 'GET', PRIVATE_REPO, manifest_ref=FAKE_MANIFEST). + auth_status(403, 200, 200). + request_status(401, 401, 404, 404), + + IndexV2TestSpec('v2.fetch_manifest_by_tagname', 'GET', ORG_REPO, manifest_ref=FAKE_MANIFEST). + auth_status(403, 200, 200). + request_status(401, 401, 404, 404), + + # v2.fetch_manifest_by_digest + IndexV2TestSpec('v2.fetch_manifest_by_digest', 'GET', PUBLIC_REPO, manifest_ref=FAKE_DIGEST). + auth_status(200, 200, 200). + request_status(404, 404, 404, 404), + + IndexV2TestSpec('v2.fetch_manifest_by_digest', 'GET', PRIVATE_REPO, manifest_ref=FAKE_DIGEST). + auth_status(403, 200, 200). + request_status(401, 401, 404, 404), + + IndexV2TestSpec('v2.fetch_manifest_by_digest', 'GET', ORG_REPO, manifest_ref=FAKE_DIGEST). + auth_status(403, 200, 200). + request_status(401, 401, 404, 404), + + # v2.write_manifest_by_tagname + IndexV2TestSpec('v2.write_manifest_by_tagname', 'PUT', PUBLIC_REPO, manifest_ref=FAKE_MANIFEST). + auth_status(403, 403, 403). + request_status(401, 401, 401, 401), + + IndexV2TestSpec('v2.write_manifest_by_tagname', 'PUT', PRIVATE_REPO, manifest_ref=FAKE_MANIFEST). + auth_status(403, 403, 200). + request_status(401, 401, 401, 400), + + IndexV2TestSpec('v2.write_manifest_by_tagname', 'PUT', ORG_REPO, manifest_ref=FAKE_MANIFEST). + auth_status(403, 403, 200). + request_status(401, 401, 401, 400), + + # v2.write_manifest_by_digest + IndexV2TestSpec('v2.write_manifest_by_digest', 'PUT', PUBLIC_REPO, manifest_ref=FAKE_DIGEST). + auth_status(403, 403, 403). + request_status(401, 401, 401, 401), + + IndexV2TestSpec('v2.write_manifest_by_digest', 'PUT', PRIVATE_REPO, manifest_ref=FAKE_DIGEST). + auth_status(403, 403, 200). + request_status(401, 401, 401, 400), + + IndexV2TestSpec('v2.write_manifest_by_digest', 'PUT', ORG_REPO, manifest_ref=FAKE_DIGEST). + auth_status(403, 403, 200). + request_status(401, 401, 401, 400), + + # v2.delete_manifest_by_digest + IndexV2TestSpec('v2.delete_manifest_by_digest', 'DELETE', PUBLIC_REPO, manifest_ref=FAKE_DIGEST). + auth_status(403, 403, 403). + request_status(401, 401, 401, 401), + + IndexV2TestSpec('v2.delete_manifest_by_digest', 'DELETE', PRIVATE_REPO, manifest_ref=FAKE_DIGEST). + auth_status(403, 403, 200). + request_status(401, 401, 401, 404), + + IndexV2TestSpec('v2.delete_manifest_by_digest', 'DELETE', ORG_REPO, manifest_ref=FAKE_DIGEST). + auth_status(403, 403, 200). + request_status(401, 401, 401, 404), + + # v2.check_blob_exists + IndexV2TestSpec('v2.check_blob_exists', 'HEAD', PUBLIC_REPO, digest=FAKE_DIGEST). + auth_status(200, 200, 200). + request_status(404, 404, 404, 404), + + IndexV2TestSpec('v2.check_blob_exists', 'HEAD', PRIVATE_REPO, digest=FAKE_DIGEST). + auth_status(403, 200, 200). + request_status(401, 401, 404, 404), + + IndexV2TestSpec('v2.check_blob_exists', 'HEAD', ORG_REPO, digest=FAKE_DIGEST). + auth_status(403, 200, 200). + request_status(401, 401, 404, 404), + + # v2.download_blob + IndexV2TestSpec('v2.download_blob', 'GET', PUBLIC_REPO, digest=FAKE_DIGEST). + auth_status(200, 200, 200). + request_status(404, 404, 404, 404), + + IndexV2TestSpec('v2.download_blob', 'GET', PRIVATE_REPO, digest=FAKE_DIGEST). + auth_status(403, 200, 200). + request_status(401, 401, 404, 404), + + IndexV2TestSpec('v2.download_blob', 'GET', ORG_REPO, digest=FAKE_DIGEST). + auth_status(403, 200, 200). + request_status(401, 401, 404, 404), + + # v2.start_blob_upload + IndexV2TestSpec('v2.start_blob_upload', 'POST', PUBLIC_REPO). + auth_status(403, 403, 403). + request_status(401, 401, 401, 401), + + IndexV2TestSpec('v2.start_blob_upload', 'POST', PRIVATE_REPO). + auth_status(403, 403, 200). + request_status(401, 401, 401, 202), + + IndexV2TestSpec('v2.start_blob_upload', 'POST', ORG_REPO). + auth_status(403, 403, 200). + request_status(401, 401, 401, 202), + + # v2.fetch_existing_upload + IndexV2TestSpec('v2.fetch_existing_upload', 'GET', PUBLIC_REPO, 'push,pull', upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 403). + request_status(401, 401, 401, 401), + + IndexV2TestSpec('v2.fetch_existing_upload', 'GET', PRIVATE_REPO, 'push,pull', upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 200). + request_status(401, 401, 401, 404), + + IndexV2TestSpec('v2.fetch_existing_upload', 'GET', ORG_REPO, 'push,pull', upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 200). + request_status(401, 401, 401, 404), + + # v2.upload_chunk + IndexV2TestSpec('v2.upload_chunk', 'PATCH', PUBLIC_REPO, upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 403). + request_status(401, 401, 401, 401), + + IndexV2TestSpec('v2.upload_chunk', 'PATCH', PRIVATE_REPO, upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 200). + request_status(401, 401, 401, 404), + + IndexV2TestSpec('v2.upload_chunk', 'PATCH', ORG_REPO, upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 200). + request_status(401, 401, 401, 404), + + # v2.monolithic_upload_or_last_chunk + IndexV2TestSpec('v2.monolithic_upload_or_last_chunk', 'PUT', PUBLIC_REPO, upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 403). + request_status(401, 401, 401, 401), + + IndexV2TestSpec('v2.monolithic_upload_or_last_chunk', 'PUT', PRIVATE_REPO, upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 200). + request_status(401, 401, 401, 400), + + IndexV2TestSpec('v2.monolithic_upload_or_last_chunk', 'PUT', ORG_REPO, upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 200). + request_status(401, 401, 401, 400), + + # v2.cancel_upload + IndexV2TestSpec('v2.cancel_upload', 'DELETE', PUBLIC_REPO, upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 403). + request_status(401, 401, 401, 401), + + IndexV2TestSpec('v2.cancel_upload', 'DELETE', PRIVATE_REPO, upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 200). + request_status(401, 401, 401, 404), + + IndexV2TestSpec('v2.cancel_upload', 'DELETE', ORG_REPO, upload_uuid=FAKE_UPLOAD_ID). + auth_status(403, 403, 200). + request_status(401, 401, 401, 404), + ] diff --git a/test/test_digest_tools.py b/test/test_digest_tools.py index 954c01052..8fcb71237 100644 --- a/test/test_digest_tools.py +++ b/test/test_digest_tools.py @@ -1,26 +1,28 @@ import unittest -from digest.digest_tools import parse_digest, content_path, InvalidDigestException +from digest.digest_tools import Digest, content_path, InvalidDigestException class TestParseDigest(unittest.TestCase): def test_parse_good(self): examples = [ - ('tarsum.v123123+sha1:123deadbeef', (True, 'v123123', 'sha1', '123deadbeef')), - ('tarsum.v1+sha256:123123', (True, 'v1', 'sha256', '123123')), - ('tarsum.v0+md5:abc', (True, 'v0', 'md5', 'abc')), - ('sha1:123deadbeef', (False, None, 'sha1', '123deadbeef')), - ('sha256:123123', (False, None, 'sha256', '123123')), - ('md5:abc', (False, None, 'md5', 'abc')), + ('tarsum.v123123+sha1:123deadbeef', ('tarsum.v123123+sha1', '123deadbeef')), + ('tarsum.v1+sha256:123123', ('tarsum.v1+sha256', '123123')), + ('tarsum.v0+md5:abc', ('tarsum.v0+md5', 'abc')), + ('tarsum+sha1:abc', ('tarsum+sha1', 'abc')), + ('sha1:123deadbeef', ('sha1', '123deadbeef')), + ('sha256:123123', ('sha256', '123123')), + ('md5:abc', ('md5', 'abc')), ] - for digest, output in examples: - self.assertEquals(parse_digest(digest), output) + for digest, output_args in examples: + self.assertEquals(Digest.parse_digest(digest), Digest(*output_args)) + + # Test the string method + self.assertEquals(str(Digest.parse_digest(digest)), digest) def test_parse_fail(self): examples = [ - 'tarsum.v++sha1:123deadbeef', - '.v1+sha256:123123', - 'tarsum.v+md5:abc', + 'tarsum.v+md5:abc:', 'sha1:123deadbeefzxczxv', 'sha256123123', 'tarsum.v1+', @@ -29,7 +31,7 @@ class TestParseDigest(unittest.TestCase): for bad_digest in examples: with self.assertRaises(InvalidDigestException): - parse_digest(bad_digest) + Digest.parse_digest(bad_digest) class TestDigestPath(unittest.TestCase): @@ -42,6 +44,8 @@ class TestDigestPath(unittest.TestCase): ('sha256:123123', 'sha256/12/123123'), ('md5:abc', 'md5/ab/abc'), ('md5:1', 'md5/01/1'), + ('md5.....+++:1', 'md5/01/1'), + ('.md5.:1', 'md5/01/1'), ] for digest, path in examples: diff --git a/test/test_filelike.py b/test/test_filelike.py new file mode 100644 index 000000000..98bb02370 --- /dev/null +++ b/test/test_filelike.py @@ -0,0 +1,135 @@ +import unittest + +from StringIO import StringIO +from util.registry.filelike import FilelikeStreamConcat, LimitingStream, StreamSlice + +class TestFilelikeStreamConcat(unittest.TestCase): + def somegenerator(self): + yield 'some' + yield 'cool' + yield 'file-contents' + + def test_parts(self): + gens = iter([StringIO(s) for s in self.somegenerator()]) + fileobj = FilelikeStreamConcat(gens) + + self.assertEquals('so', fileobj.read(2)) + self.assertEquals('mec', fileobj.read(3)) + self.assertEquals('oolfile', fileobj.read(7)) + self.assertEquals('-contents', fileobj.read(-1)) + + def test_entire(self): + gens = iter([StringIO(s) for s in self.somegenerator()]) + fileobj = FilelikeStreamConcat(gens) + self.assertEquals('somecoolfile-contents', fileobj.read(-1)) + + +class TestLimitingStream(unittest.TestCase): + def test_nolimit(self): + fileobj = StringIO('this is a cool test') + stream = LimitingStream(fileobj) + self.assertEquals('this is a cool test', stream.read(-1)) + self.assertEquals(stream.tell(), len('this is a cool test')) + + def test_simplelimit(self): + fileobj = StringIO('this is a cool test') + stream = LimitingStream(fileobj, 4) + self.assertEquals('this', stream.read(-1)) + self.assertEquals(stream.tell(), 4) + + def test_simplelimit_readdefined(self): + fileobj = StringIO('this is a cool test') + stream = LimitingStream(fileobj, 4) + self.assertEquals('th', stream.read(2)) + self.assertEquals(stream.tell(), 2) + + def test_nolimit_readdefined(self): + fileobj = StringIO('this is a cool test') + stream = LimitingStream(fileobj, -1) + self.assertEquals('th', stream.read(2)) + self.assertEquals(stream.tell(), 2) + + def test_limit_multiread(self): + fileobj = StringIO('this is a cool test') + stream = LimitingStream(fileobj, 7) + self.assertEquals('this', stream.read(4)) + self.assertEquals(' is', stream.read(3)) + self.assertEquals('', stream.read(2)) + self.assertEquals(stream.tell(), 7) + + def test_limit_multiread2(self): + fileobj = StringIO('this is a cool test') + stream = LimitingStream(fileobj, 7) + self.assertEquals('this', stream.read(4)) + self.assertEquals(' is', stream.read(-1)) + self.assertEquals(stream.tell(), 7) + + def test_seek(self): + fileobj = StringIO('this is a cool test') + stream = LimitingStream(fileobj) + stream.seek(2) + + self.assertEquals('is', stream.read(2)) + self.assertEquals(stream.tell(), 4) + + def test_seek_withlimit(self): + fileobj = StringIO('this is a cool test') + stream = LimitingStream(fileobj, 3) + stream.seek(2) + + self.assertEquals('i', stream.read(2)) + self.assertEquals(stream.tell(), 3) + + def test_seek_pastlimit(self): + fileobj = StringIO('this is a cool test') + stream = LimitingStream(fileobj, 3) + stream.seek(4) + + self.assertEquals('', stream.read(1)) + self.assertEquals(stream.tell(), 3) + + +class TestStreamSlice(unittest.TestCase): + def test_none_read(self): + class NoneReader(object): + def read(self, size=None): + return None + + stream = StreamSlice(NoneReader(), 0) + self.assertEquals(None, stream.read(-1)) + self.assertEquals(0, stream.tell()) + + def test_noslice(self): + fileobj = StringIO('this is a cool test') + stream = StreamSlice(fileobj, 0) + self.assertEquals('this is a cool test', stream.read(-1)) + self.assertEquals(stream.tell(), len('this is a cool test')) + + def test_startindex(self): + fileobj = StringIO('this is a cool test') + stream = StreamSlice(fileobj, 5) + self.assertEquals('is a cool test', stream.read(-1)) + self.assertEquals(stream.tell(), len('is a cool test')) + + def test_startindex_limitedread(self): + fileobj = StringIO('this is a cool test') + stream = StreamSlice(fileobj, 5) + self.assertEquals('is a', stream.read(4)) + self.assertEquals(stream.tell(), 4) + + def test_slice(self): + fileobj = StringIO('this is a cool test') + stream = StreamSlice(fileobj, 5, 9) + self.assertEquals('is a', stream.read(-1)) + self.assertEquals(stream.tell(), len('is a')) + + def test_slice_explictread(self): + fileobj = StringIO('this is a cool test') + stream = StreamSlice(fileobj, 5, 9) + self.assertEquals('is', stream.read(2)) + self.assertEquals(' a', stream.read(5)) + self.assertEquals(stream.tell(), len('is a')) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_registry_v2_auth.py b/test/test_registry_v2_auth.py new file mode 100644 index 000000000..f449935f3 --- /dev/null +++ b/test/test_registry_v2_auth.py @@ -0,0 +1,185 @@ +import unittest +import time +import jwt + +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives.asymmetric import rsa + +from app import app +from endpoints.v2.v2auth import (TOKEN_VALIDITY_LIFETIME_S, load_certificate_bytes, + load_private_key, ANONYMOUS_SUB) +from auth.jwt_auth import identity_from_bearer_token, load_public_key, InvalidJWTException +from util.morecollections import AttrDict + + +TEST_AUDIENCE = app.config['SERVER_HOSTNAME'] +TEST_USER = AttrDict({'username': 'joeuser'}) +MAX_SIGNED_S = 3660 + +class TestRegistryV2Auth(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(TestRegistryV2Auth, self).__init__(*args, **kwargs) + self.public_key = None + + def setUp(self): + certificate_file_path = app.config['JWT_AUTH_CERTIFICATE_PATH'] + self.public_key = load_public_key(certificate_file_path) + + def _generate_token_data(self, access=[], audience=TEST_AUDIENCE, user=TEST_USER, iat=None, + exp=None, nbf=None, iss=app.config['JWT_AUTH_TOKEN_ISSUER']): + return { + 'iss': iss, + 'aud': audience, + 'nbf': nbf if nbf is not None else int(time.time()), + 'iat': iat if iat is not None else int(time.time()), + 'exp': exp if exp is not None else int(time.time() + TOKEN_VALIDITY_LIFETIME_S), + 'sub': user.username if user else ANONYMOUS_SUB, + 'access': access, + } + + def _generate_token(self, token_data): + + certificate = load_certificate_bytes(app.config['JWT_AUTH_CERTIFICATE_PATH']) + + token_headers = { + 'x5c': [certificate], + } + + private_key = load_private_key(app.config['JWT_AUTH_PRIVATE_KEY_PATH']) + token_data = jwt.encode(token_data, private_key, 'RS256', headers=token_headers) + return 'Bearer {0}'.format(token_data) + + def _parse_token(self, token): + return identity_from_bearer_token(token, MAX_SIGNED_S, self.public_key) + + def _generate_public_key(self): + key = rsa.generate_private_key( + public_exponent=65537, + key_size=1024, + backend=default_backend() + ) + return key.public_key() + + def test_accepted_token(self): + token = self._generate_token(self._generate_token_data()) + identity = self._parse_token(token) + self.assertEqual(identity.id, TEST_USER.username) + self.assertEqual(0, len(identity.provides)) + + anon_token = self._generate_token(self._generate_token_data(user=None)) + anon_identity = self._parse_token(anon_token) + self.assertEqual(anon_identity.id, ANONYMOUS_SUB) + self.assertEqual(0, len(identity.provides)) + + def test_token_with_access(self): + access = [ + { + 'type': 'repository', + 'name': 'somens/somerepo', + 'actions': ['pull', 'push'], + } + ] + token = self._generate_token(self._generate_token_data(access=access)) + identity = self._parse_token(token) + self.assertEqual(identity.id, TEST_USER.username) + self.assertEqual(1, len(identity.provides)) + + def test_malformed_access(self): + access = [ + { + 'toipe': 'repository', + 'namesies': 'somens/somerepo', + 'akshuns': ['pull', 'push'], + } + ] + token = self._generate_token(self._generate_token_data(access=access)) + with self.assertRaises(InvalidJWTException): + self._parse_token(token) + + def test_bad_signature(self): + token = self._generate_token(self._generate_token_data()) + other_public_key = self._generate_public_key() + with self.assertRaises(InvalidJWTException): + identity_from_bearer_token(token, MAX_SIGNED_S, other_public_key) + + def test_audience(self): + token_data = self._generate_token_data(audience='someotherapp') + token = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(token) + + token_data.pop('aud') + no_aud = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(no_aud) + + def test_nbf(self): + future = int(time.time()) + 60 + token_data = self._generate_token_data(nbf=future) + + token = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(token) + + token_data.pop('nbf') + no_nbf_token = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(no_nbf_token) + + def test_iat(self): + future = int(time.time()) + 60 + token_data = self._generate_token_data(iat=future) + + token = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(token) + + token_data.pop('iat') + no_iat_token = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(no_iat_token) + + def test_exp(self): + too_far = int(time.time()) + MAX_SIGNED_S * 2 + token_data = self._generate_token_data(exp=too_far) + + token = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(token) + + past = int(time.time()) - 60 + token_data['exp'] = past + expired_token = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(expired_token) + + token_data.pop('exp') + no_exp_token = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(no_exp_token) + + def test_no_sub(self): + token_data = self._generate_token_data() + token_data.pop('sub') + token = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(token) + + def test_iss(self): + token_data = self._generate_token_data(iss='badissuer') + + token = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(token) + + token_data.pop('iss') + no_iss_token = self._generate_token(token_data) + with self.assertRaises(InvalidJWTException): + self._parse_token(no_iss_token) + + +if __name__ == '__main__': + import logging + logging.basicConfig(level=logging.DEBUG) + unittest.main() + diff --git a/test/test_endpoint_security.py b/test/test_v1_endpoint_security.py similarity index 83% rename from test/test_endpoint_security.py rename to test/test_v1_endpoint_security.py index 27b393cd7..fe88f6d5b 100644 --- a/test/test_endpoint_security.py +++ b/test/test_v1_endpoint_security.py @@ -1,25 +1,26 @@ import unittest +import endpoints.decorated +import json from app import app from util.names import parse_namespace_repository from initdb import setup_database_for_testing, finished_database_for_testing -from specs import build_index_specs +from specs import build_v1_index_specs + from endpoints.v1 import v1_bp - app.register_blueprint(v1_bp, url_prefix='/v1') - NO_ACCESS_USER = 'freshuser' READ_ACCESS_USER = 'reader' ADMIN_ACCESS_USER = 'devtable' class EndpointTestCase(unittest.TestCase): - def setUp(self): + def setUp(self): setup_database_for_testing(self) - def tearDown(self): + def tearDown(self): finished_database_for_testing(self) @@ -68,13 +69,13 @@ class _SpecTestBuilder(type): expected_status = getattr(test_spec, attrs['result_attr']) test = _SpecTestBuilder._test_generator(url, expected_status, - open_kwargs, - session_vars) + open_kwargs, + session_vars) test_name_url = url.replace('/', '_').replace('-', '_') sess_repo = str(test_spec.sess_repo).replace('/', '_') - test_name = 'test_%s%s_%s' % (open_kwargs['method'].lower(), - test_name_url, sess_repo) + test_name = 'test_%s%s_%s_%s' % (open_kwargs['method'].lower(), + test_name_url, sess_repo, attrs['result_attr']) attrs[test_name] = test return type(name, bases, attrs) @@ -82,27 +83,31 @@ class _SpecTestBuilder(type): class TestAnonymousAccess(EndpointTestCase): __metaclass__ = _SpecTestBuilder - spec_func = build_index_specs + spec_func = build_v1_index_specs result_attr = 'anon_code' auth_username = None class TestNoAccess(EndpointTestCase): __metaclass__ = _SpecTestBuilder - spec_func = build_index_specs + spec_func = build_v1_index_specs result_attr = 'no_access_code' auth_username = NO_ACCESS_USER class TestReadAccess(EndpointTestCase): __metaclass__ = _SpecTestBuilder - spec_func = build_index_specs + spec_func = build_v1_index_specs result_attr = 'read_code' auth_username = READ_ACCESS_USER class TestAdminAccess(EndpointTestCase): __metaclass__ = _SpecTestBuilder - spec_func = build_index_specs + spec_func = build_v1_index_specs result_attr = 'admin_code' auth_username = ADMIN_ACCESS_USER + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_v2_endpoint_security.py b/test/test_v2_endpoint_security.py new file mode 100644 index 000000000..06d1f025f --- /dev/null +++ b/test/test_v2_endpoint_security.py @@ -0,0 +1,110 @@ +import unittest +import endpoints.decorated +import json + +from app import app +from util.names import parse_namespace_repository +from initdb import setup_database_for_testing, finished_database_for_testing +from specs import build_v2_index_specs +from endpoints.v2 import v2_bp + +app.register_blueprint(v2_bp, url_prefix='/v2') + +NO_ACCESS_USER = 'freshuser' +READ_ACCESS_USER = 'reader' +ADMIN_ACCESS_USER = 'devtable' + + +class EndpointTestCase(unittest.TestCase): + def setUp(self): + setup_database_for_testing(self) + + def tearDown(self): + finished_database_for_testing(self) + + +class _SpecTestBuilder(type): + @staticmethod + def _test_generator(url, test_spec, attrs): + def test(self): + with app.test_client() as c: + headers = [] + expected_index_status = getattr(test_spec, attrs['result_attr']) + + if attrs['auth_username']: + expected_auth_status = getattr(test_spec, 'auth_' + attrs['result_attr']) + + # Get a signed JWT. + username = attrs['auth_username'] + password = 'password' + + jwt_scope = test_spec.get_scope_string() + query_string = 'service=' + app.config['SERVER_HOSTNAME'] + '&scope=' + jwt_scope + + arv = c.open('/v2/auth', + headers=[('authorization', test_spec.gen_basic_auth(username, password))], + query_string=query_string) + + msg = 'Auth failed for %s %s: got %s, expected: %s' % ( + test_spec.method_name, test_spec.index_name, arv.status_code, expected_auth_status) + self.assertEqual(arv.status_code, expected_auth_status, msg) + + if arv.status_code == 200: + headers = [('authorization', 'Bearer ' + json.loads(arv.data)['token'])] + + rv = c.open(url, headers=headers, method=test_spec.method_name) + msg = '%s %s: got %s, expected: %s (auth: %s | headers %s)' % (test_spec.method_name, + test_spec.index_name, rv.status_code, expected_index_status, attrs['auth_username'], + len(headers)) + + self.assertEqual(rv.status_code, expected_index_status, msg) + + return test + + + def __new__(cls, name, bases, attrs): + with app.test_request_context() as ctx: + specs = attrs['spec_func']() + for test_spec in specs: + test_name = '%s_%s_%s_%s_%s' % (test_spec.index_name, test_spec.method_name, + test_spec.repo_name, attrs['auth_username'] or 'anon', + attrs['result_attr']) + test_name = test_name.replace('/', '_').replace('-', '_') + + test_name = 'test_' + test_name.lower().replace('v2.', 'v2_') + url = test_spec.get_url() + attrs[test_name] = _SpecTestBuilder._test_generator(url, test_spec, attrs) + + return type(name, bases, attrs) + + +class TestAnonymousAccess(EndpointTestCase): + __metaclass__ = _SpecTestBuilder + spec_func = build_v2_index_specs + result_attr = 'anon_code' + auth_username = None + + +class TestNoAccess(EndpointTestCase): + __metaclass__ = _SpecTestBuilder + spec_func = build_v2_index_specs + result_attr = 'no_access_code' + auth_username = NO_ACCESS_USER + + +class TestReadAccess(EndpointTestCase): + __metaclass__ = _SpecTestBuilder + spec_func = build_v2_index_specs + result_attr = 'read_code' + auth_username = READ_ACCESS_USER + + +class TestAdminAccess(EndpointTestCase): + __metaclass__ = _SpecTestBuilder + spec_func = build_v2_index_specs + result_attr = 'admin_code' + auth_username = ADMIN_ACCESS_USER + + +if __name__ == '__main__': + unittest.main() diff --git a/tools/auditancestry.py b/tools/auditancestry.py deleted file mode 100644 index 7464e7591..000000000 --- a/tools/auditancestry.py +++ /dev/null @@ -1,104 +0,0 @@ -import logging -import json - -from data.database import Image, ImageStorage, Repository, User, configure -from data import model -from app import app, storage as store - - -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.DEBUG) - -configure(app.config) - -# Turn off debug logging for boto -logging.getLogger('boto').setLevel(logging.CRITICAL) - - -query = (Image - .select(Image, ImageStorage, Repository, User) - .join(ImageStorage) - .switch(Image) - .join(Repository) - .join(User) - .where(ImageStorage.uploading == False)) - -bad_count = 0 -good_count = 0 - -def resolve_or_create(repo, docker_image_id, new_ancestry): - existing = model.image.get_repo_image_extended(repo.namespace_user.username, repo.name, - docker_image_id) - if existing: - logger.debug('Found existing image: %s, %s', existing.id, docker_image_id) - return existing - else: - # we need to find some storage to link it to - try: - to_link = (ImageStorage - .select() - .join(Image) - .where(Image.docker_image_id == docker_image_id) - .get()) - logger.debug('Linking to storage: %s' % to_link.uuid) - created = Image.create(docker_image_id=docker_image_id, repository=repo, - storage=to_link, ancestors=new_ancestry) - logger.debug('Created image: %s' % created) - return created - except ImageStorage.DoesNotExist: - msg = 'No image available anywhere for storage: %s in namespace: %s' - logger.error(msg, docker_image_id, repo.namespace_user.username) - raise RuntimeError() - - -def all_ancestors_exist(ancestors): - if not ancestors: - return True - - found_count = len(list(Image - .select() - .where(Image.id << ancestors))) - return found_count == len(ancestors) - - -cant_fix = [] -for img in query: - try: - with_locations = model.image.get_repo_image_extended(img.repository.namespace_user.username, - img.repository.name, img.docker_image_id) - ancestry_storage = store.image_ancestry_path(img.storage.uuid) - if store.exists(with_locations.storage.locations, ancestry_storage): - full_ancestry = json.loads(store.get_content(with_locations.storage.locations, - ancestry_storage))[1:] - full_ancestry.reverse() - - ancestor_dbids = [int(anc_id) for anc_id in img.ancestors.split('/')[1:-1]] - - if len(full_ancestry) != len(ancestor_dbids) or not all_ancestors_exist(ancestor_dbids): - logger.error('Image has incomplete ancestry: %s, %s, %s, %s', img.id, img.docker_image_id, - full_ancestry, ancestor_dbids) - - fixed_ancestry = '/' - for ancestor in full_ancestry: - ancestor_img = resolve_or_create(img.repository, ancestor, - fixed_ancestry) - fixed_ancestry += str(ancestor_img.id) + '/' - - img.ancestors = fixed_ancestry - img.save() - - bad_count += 1 - else: - good_count += 1 - else: - bad_count += 1 - - except RuntimeError: - cant_fix.append(img) - - logger.debug('Bad: %s Good: %s Can\'t Fix: %s', bad_count, good_count, - len(cant_fix)) - -for cant in cant_fix: - logger.error('Unable to fix %s in repo %s/%s', cant.id, cant.repository.namespace_user.username, - cant.repository.name) diff --git a/tools/migrateimage.py b/tools/migrateimage.py deleted file mode 100644 index 7cc3fbe22..000000000 --- a/tools/migrateimage.py +++ /dev/null @@ -1,66 +0,0 @@ -import argparse -import logging - -from data import model -from data.database import ImageStoragePlacement, ImageStorageLocation -from app import storage - - -logger = logging.getLogger(__name__) - - -PATHSPECS = [ - (storage.image_layer_path, True), - (storage.image_ancestry_path, True), - (storage.image_file_trie_path, False), - (storage.image_file_diffs_path, False), -] - - -def migrate_image(image, destination_location): - logger.debug('Migrating image: %s -> %s', image.docker_image_id, destination_location.name) - destination_location_set = {destination_location.name} - - for path_func, required in PATHSPECS: - path = path_func(image.storage.uuid) - - if storage.exists(image.storage.locations, path): - if not storage.exists(destination_location_set, path): - logger.debug('Migrating path: %s', path) - - with storage.stream_read_file(image.storage.locations, path) as file_to_migrate: - storage.stream_write(destination_location_set, path, file_to_migrate) - else: - logger.debug('File already present in destination: %s', path) - elif required: - raise RuntimeError('Required file not present in image to migrate: %s', path) - - # Successfully migrated, now write the placement - ImageStoragePlacement.create(location=destination_location, storage=image.storage) - -parser = argparse.ArgumentParser(description='Replicate an image storage.') -parser.add_argument('--namespace', type=str, required=True, - help='Namespace for the repository containing the image to be replicated') -parser.add_argument('--repository', type=str, required=True, - help='Name for the repository containing the image to be replicated') -parser.add_argument('--imageid', type=str, default=None, - help='Specific image to migrate, entire repo will be migrated if omitted') -parser.add_argument('--to', type=str, required=True, - help='Storage region to which the data should be replicated') - -if __name__ == "__main__": - logging.basicConfig(level=logging.DEBUG) - logging.getLogger('boto').setLevel(logging.CRITICAL) - - args = parser.parse_args() - - location = ImageStorageLocation.get(name=args.to) - - images = [] - if args.imageid is not None: - images = [model.image.get_image_by_id(args.namespace, args.repository, args.imageid)] - else: - images = model.image.get_repository_images(args.namespace, args.repository) - - for img in images: - migrate_image(img, location) diff --git a/util/migrate/uncompressedsize.py b/util/migrate/uncompressedsize.py index ff7de52e0..1a3ceb4a6 100644 --- a/util/migrate/uncompressedsize.py +++ b/util/migrate/uncompressedsize.py @@ -67,7 +67,7 @@ def backfill_sizes_from_data(): decompressor = zlib.decompressobj(ZLIB_GZIP_WINDOW) uncompressed_size = 0 - with store.stream_read_file(with_locs.locations, store.image_layer_path(uuid)) as stream: + with store.stream_read_file(with_locs.locations, store.v1_image_layer_path(uuid)) as stream: while True: current_data = stream.read(CHUNK_SIZE) if len(current_data) == 0: diff --git a/util/registry/filelike.py b/util/registry/filelike.py new file mode 100644 index 000000000..97555a23f --- /dev/null +++ b/util/registry/filelike.py @@ -0,0 +1,160 @@ +WHENCE_ABSOLUTE = 0 +WHENCE_RELATIVE = 1 +WHENCE_RELATIVE_END = 2 + +READ_UNTIL_END = -1 + + +class BaseStreamFilelike(object): + def __init__(self, fileobj): + self._fileobj = fileobj + self._cursor_position = 0 + + def close(self): + self._fileobj.close() + + def read(self, size=READ_UNTIL_END): + buf = self._fileobj.read(size) + if buf is None: + return None + self._cursor_position += len(buf) + return buf + + def tell(self): + return self._cursor_position + + def seek(self, index, whence=WHENCE_ABSOLUTE): + num_bytes_to_ff = 0 + if whence == WHENCE_ABSOLUTE: + if index < self._cursor_position: + raise IOError('Cannot seek backwards') + num_bytes_to_ff = index - self._cursor_position + + elif whence == WHENCE_RELATIVE: + if index < 0: + raise IOError('Cannnot seek backwards') + num_bytes_to_ff = index + + elif whence == WHENCE_RELATIVE_END: + raise IOError('Stream does not have a known end point') + + bytes_forward = num_bytes_to_ff + while num_bytes_to_ff > 0: + buf = self._fileobj.read(num_bytes_to_ff) + if not buf: + raise IOError('Seek past end of file') + num_bytes_to_ff -= len(buf) + + self._cursor_position += bytes_forward + return bytes_forward + + +class SocketReader(BaseStreamFilelike): + def __init__(self, fileobj): + super(SocketReader, self).__init__(fileobj) + self.handlers = [] + + def add_handler(self, handler): + self.handlers.append(handler) + + def read(self, size=READ_UNTIL_END): + buf = super(SocketReader, self).read(size) + for handler in self.handlers: + handler(buf) + return buf + + +def wrap_with_handler(in_fp, handler): + wrapper = SocketReader(in_fp) + wrapper.add_handler(handler) + return wrapper + + +class FilelikeStreamConcat(object): + """ A file-like object which concats all the file-like objects in the specified generator into + a single stream. + """ + def __init__(self, file_generator): + self._file_generator = file_generator + self._current_file = file_generator.next() + self._current_position = 0 + self._closed = False + + def tell(self): + return self._current_position + + def close(self): + self._closed = True + + def read(self, size=READ_UNTIL_END): + buf = '' + current_size = size + + while size == READ_UNTIL_END or len(buf) < size: + current_buf = self._current_file.read(current_size) + if current_buf: + buf += current_buf + self._current_position += len(current_buf) + if size != READ_UNTIL_END: + current_size -= len(current_buf) + + else: + # That file was out of data, prime a new one + self._current_file.close() + try: + self._current_file = self._file_generator.next() + except StopIteration: + return buf + + return buf + + +class StreamSlice(BaseStreamFilelike): + """ A file-like object which returns a file-like object that represents a slice of the data in + the specified file obj. All methods will act as if the slice is its own file. + """ + + def __init__(self, fileobj, start_offset=0, end_offset_exclusive=READ_UNTIL_END): + super(StreamSlice, self).__init__(fileobj) + self._end_offset_exclusive = end_offset_exclusive + self._start_offset = start_offset + + if start_offset > 0: + self.seek(start_offset) + + def read(self, size=READ_UNTIL_END): + if self._end_offset_exclusive == READ_UNTIL_END: + # We weren't asked to limit the end of the stream + return super(StreamSlice, self).read(size) + + # Compute the max bytes to read until the end or until we reach the user requested max + max_bytes_to_read = self._end_offset_exclusive - super(StreamSlice, self).tell() + if size != READ_UNTIL_END: + max_bytes_to_read = min(max_bytes_to_read, size) + + return super(StreamSlice, self).read(max_bytes_to_read) + + def _file_min(self, first, second): + if first == READ_UNTIL_END: + return second + + if second == READ_UNTIL_END: + return first + + return min(first, second) + + def tell(self): + return super(StreamSlice, self).tell() - self._start_offset + + def seek(self, index, whence=WHENCE_ABSOLUTE): + index = self._file_min(self._end_offset_exclusive, index) + super(StreamSlice, self).seek(index, whence) + + +class LimitingStream(StreamSlice): + """ A file-like object which mimics the specified file stream being limited to the given number + of bytes. All calls after that limit (if specified) will act as if the file has no additional + data. + """ + def __init__(self, fileobj, read_limit=READ_UNTIL_END): + super(LimitingStream, self).__init__(fileobj, 0, read_limit) diff --git a/util/registry/gzipwrap.py b/util/registry/gzipwrap.py index 604fa343d..685e5bb13 100644 --- a/util/registry/gzipwrap.py +++ b/util/registry/gzipwrap.py @@ -11,6 +11,9 @@ class GzipWrap(object): self.is_done = False def read(self, size=-1): + if size is None or size < 0: + raise Exception('Call to GzipWrap with unbound size will result in poor performance') + # If the buffer already has enough bytes, then simply pop them off of # the beginning and return them. if len(self.buffer) >= size or self.is_done: diff --git a/util/security/strictjwt.py b/util/security/strictjwt.py index 35f94444c..61bb61454 100644 --- a/util/security/strictjwt.py +++ b/util/security/strictjwt.py @@ -1,3 +1,4 @@ +from datetime import datetime, timedelta from jwt import PyJWT from jwt.exceptions import ( InvalidTokenError, DecodeError, InvalidAudienceError, ExpiredSignatureError, @@ -14,8 +15,41 @@ class StrictJWT(PyJWT): 'require_exp': True, 'require_iat': True, 'require_nbf': True, + 'exp_max_s': None, }) return defaults + def _validate_claims(self, payload, options, audience=None, issuer=None, leeway=0, **kwargs): + if options.get('exp_max_s') is not None: + if 'verify_expiration' in kwargs and not kwargs.get('verify_expiration'): + raise ValueError('exp_max_s option implies verify_expiration') + + options['verify_exp'] = True + + # Do all of the other checks + super(StrictJWT, self)._validate_claims(payload, options, audience, issuer, leeway, **kwargs) + + if 'exp' in payload and options.get('exp_max_s') is not None: + # Validate that the expiration was not more than exp_max_s seconds after the issue time + # or in the absense of an issue time, more than exp_max_s in the future from now + + # This will work because the parent method already checked the type of exp + expiration = datetime.utcfromtimestamp(int(payload['exp'])) + max_signed_s = options.get('exp_max_s') + + start_time = datetime.utcnow() + if 'iat' in payload: + start_time = datetime.utcfromtimestamp(int(payload['iat'])) + + if expiration > start_time + timedelta(seconds=max_signed_s): + raise InvalidTokenError('Token was signed for more than %s seconds from %s', max_signed_s, + start_time) + + +def exp_max_s_option(max_exp_s): + return { + 'exp_max_s': max_exp_s, + } + decode = StrictJWT().decode diff --git a/workers/storagereplication.py b/workers/storagereplication.py index 7b0dd4562..54e532fc5 100644 --- a/workers/storagereplication.py +++ b/workers/storagereplication.py @@ -49,8 +49,7 @@ class StorageReplicationWorker(QueueWorker): logger.debug('Copying image storage %s to location %s', partial_storage.uuid, location) # Copy the various paths. - paths = [storage_paths.image_ancestry_path, - storage_paths.image_layer_path] + paths = [storage_paths.v1_image_layer_path] try: for path_builder in paths: