import hashlib import json import logging import uuid from flask import redirect, Blueprint, abort, send_file, make_response, request import features from app import app, signer, storage, metric_queue, config_provider, ip_resolver, instance_keys from auth.auth_context import get_authenticated_user from auth.decorators import process_auth from auth.permissions import ReadRepositoryPermission from data import database from data import model from data.registry_model import registry_model from endpoints.decorators import (anon_protect, anon_allowed, route_show_if, parse_repository_name, check_region_blacklisted) from endpoints.v2.blob import BLOB_DIGEST_ROUTE from image.appc import AppCImageFormatter from image.docker import ManifestException from image.docker.squashed import SquashedDockerImageFormatter from storage import Storage from util.audit import track_and_log, wrap_repository from util.http import exact_abort from util.registry.filelike import wrap_with_handler from util.registry.queuefile import QueueFile from util.registry.queueprocess import QueueProcess from util.registry.tarlayerformat import TarLayerFormatterReporter from util.registry.torrent import (make_torrent, per_user_torrent_filename, public_torrent_filename, PieceHasher, TorrentConfiguration) logger = logging.getLogger(__name__) verbs = Blueprint('verbs', __name__) LAYER_MIMETYPE = 'binary/octet-stream' class VerbReporter(TarLayerFormatterReporter): def __init__(self, kind): self.kind = kind def report_pass(self, pass_count): metric_queue.verb_action_passes.Inc(labelvalues=[self.kind, pass_count]) def _open_stream(formatter, tag, schema1_manifest, derived_image_id, handlers, reporter): """ This method generates a stream of data which will be replicated and read from the queue files. This method runs in a separate process. """ # For performance reasons, we load the full image list here, cache it, then disconnect from # the database. with database.UseThenDisconnect(app.config): layers = registry_model.list_parsed_manifest_layers(tag.repository, schema1_manifest, storage, include_placements=True) def image_stream_getter(store, blob): def get_stream_for_storage(): current_image_stream = store.stream_read_file(blob.placements, blob.storage_path) logger.debug('Returning blob %s: %s', blob.digest, blob.storage_path) return current_image_stream return get_stream_for_storage def tar_stream_getter_iterator(): # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) store = Storage(app, metric_queue, config_provider=config_provider, ip_resolver=ip_resolver) # Note: We reverse because we have to start at the leaf layer and move upward, # as per the spec for the formatters. for layer in reversed(layers): yield image_stream_getter(store, layer.blob) stream = formatter.build_stream(tag, schema1_manifest, derived_image_id, layers, tar_stream_getter_iterator, reporter=reporter) for handler_fn in handlers: stream = wrap_with_handler(stream, handler_fn) return stream.read def _sign_derived_image(verb, derived_image, queue_file): """ Read from the queue file and sign the contents which are generated. This method runs in a separate process. """ signature = None try: signature = signer.detached_sign(queue_file) except: logger.exception('Exception when signing %s deriving image %s', verb, derived_image) return # Setup the database (since this is a new process) and then disconnect immediately # once the operation completes. if not queue_file.raised_exception: with database.UseThenDisconnect(app.config): registry_model.set_derived_image_signature(derived_image, signer.name, signature) def _write_derived_image_to_storage(verb, derived_image, queue_file): """ Read from the generated stream and write it back to the storage engine. This method runs in a separate process. """ def handle_exception(ex): logger.debug('Exception when building %s derived image %s: %s', verb, derived_image, ex) with database.UseThenDisconnect(app.config): registry_model.delete_derived_image(derived_image) queue_file.add_exception_handler(handle_exception) # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) store = Storage(app, metric_queue, config_provider=config_provider, ip_resolver=ip_resolver) try: store.stream_write(derived_image.blob.placements, derived_image.blob.storage_path, queue_file) except IOError as ex: logger.debug('Exception when writing %s derived image %s: %s', verb, derived_image, ex) with database.UseThenDisconnect(app.config): registry_model.delete_derived_image(derived_image) queue_file.close() def _torrent_for_blob(blob, is_public): """ Returns a response containing the torrent file contents for the given blob. May abort with an error if the state is not valid (e.g. non-public, non-user request). """ # Make sure the storage has a size. if not blob.compressed_size: abort(404) # Lookup the torrent information for the storage. torrent_info = registry_model.get_torrent_info(blob) if torrent_info is None: abort(404) # Lookup the webseed path for the storage. webseed = storage.get_direct_download_url(blob.placements, blob.storage_path, expires_in=app.config['BITTORRENT_WEBSEED_LIFETIME']) if webseed is None: # We cannot support webseeds for storages that cannot provide direct downloads. exact_abort(501, 'Storage engine does not support seeding.') # Load the config for building torrents. torrent_config = TorrentConfiguration.from_app_config(instance_keys, app.config) # Build the filename for the torrent. if is_public: name = public_torrent_filename(blob.uuid) else: user = get_authenticated_user() if not user: abort(403) name = per_user_torrent_filename(torrent_config, user.uuid, blob.uuid) # Return the torrent file. torrent_file = make_torrent(torrent_config, name, webseed, blob.compressed_size, torrent_info.piece_length, torrent_info.pieces) headers = { 'Content-Type': 'application/x-bittorrent', 'Content-Disposition': 'attachment; filename={0}.torrent'.format(name)} return make_response(torrent_file, 200, headers) def _torrent_repo_verb(repository, tag, manifest, verb, **kwargs): """ Handles returning a torrent for the given verb on the given image and tag. """ if not features.BITTORRENT: # Torrent feature is not enabled. abort(406) # Lookup an *existing* derived storage for the verb. If the verb's image storage doesn't exist, # we cannot create it here, so we 406. derived_image = registry_model.lookup_derived_image(manifest, verb, storage, varying_metadata={'tag': tag.name}, include_placements=True) if derived_image is None: abort(406) # Return the torrent. torrent = _torrent_for_blob(derived_image.blob, model.repository.is_repository_public(repository)) # Log the action. track_and_log('repo_verb', wrap_repository(repository), tag=tag.name, verb=verb, torrent=True, **kwargs) return torrent def _verify_repo_verb(_, namespace, repo_name, tag_name, verb, checker=None): permission = ReadRepositoryPermission(namespace, repo_name) repo = model.repository.get_repository(namespace, repo_name) repo_is_public = repo is not None and model.repository.is_repository_public(repo) if not permission.can() and not repo_is_public: logger.debug('No permission to read repository %s/%s for user %s with verb %s', namespace, repo_name, get_authenticated_user(), verb) abort(403) if repo is not None and repo.kind.name != 'image': logger.debug('Repository %s/%s for user %s is not an image repo', namespace, repo_name, get_authenticated_user()) abort(405) # Make sure the repo's namespace isn't disabled. if not registry_model.is_namespace_enabled(namespace): abort(400) # Lookup the requested tag. repo_ref = registry_model.lookup_repository(namespace, repo_name) if repo_ref is None: abort(404) tag = registry_model.get_repo_tag(repo_ref, tag_name) if tag is None: logger.debug('Tag %s does not exist in repository %s/%s for user %s', tag, namespace, repo_name, get_authenticated_user()) abort(404) # Get its associated manifest. manifest = registry_model.get_manifest_for_tag(tag, backfill_if_necessary=True) if manifest is None: logger.debug('Could not get manifest on %s/%s:%s::%s', namespace, repo_name, tag.name, verb) abort(404) # Retrieve the schema1-compatible version of the manifest. try: schema1_manifest = registry_model.get_schema1_parsed_manifest(manifest, namespace, repo_name, tag.name, storage) except ManifestException: logger.exception('Could not get manifest on %s/%s:%s::%s', namespace, repo_name, tag.name, verb) abort(400) if schema1_manifest is None: abort(404) # If there is a data checker, call it first. if checker is not None: if not checker(tag, schema1_manifest): logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repo_name, tag.name, verb) abort(404) # Preload the tag's repository information, so it gets cached. assert tag.repository.namespace_name assert tag.repository.name return tag, manifest, schema1_manifest def _repo_verb_signature(namespace, repository, tag_name, verb, checker=None, **kwargs): # Verify that the tag exists and that we have access to it. tag, manifest, _ = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker) # Find the derived image storage for the verb. derived_image = registry_model.lookup_derived_image(manifest, verb, storage, varying_metadata={'tag': tag.name}) if derived_image is None or derived_image.blob.uploading: return make_response('', 202) # Check if we have a valid signer configured. if not signer.name: abort(404) # Lookup the signature for the verb. signature_value = registry_model.get_derived_image_signature(derived_image, signer.name) if signature_value is None: abort(404) # Return the signature. return make_response(signature_value) @check_region_blacklisted() def _repo_verb(namespace, repository, tag_name, verb, formatter, sign=False, checker=None, **kwargs): # Verify that the image exists and that we have access to it. logger.debug('Verifying repo verb %s for repository %s/%s with user %s with mimetype %s', verb, namespace, repository, get_authenticated_user(), request.accept_mimetypes.best) tag, manifest, schema1_manifest = _verify_repo_verb(storage, namespace, repository, tag_name, verb, checker) # Load the repository for later. repo = model.repository.get_repository(namespace, repository) if repo is None: abort(404) # Check for torrent. If found, we return a torrent for the repo verb image (if the derived # image already exists). if request.accept_mimetypes.best == 'application/x-bittorrent': metric_queue.repository_pull.Inc(labelvalues=[namespace, repository, verb + '+torrent', True]) return _torrent_repo_verb(repo, tag, manifest, verb, **kwargs) # Log the action. track_and_log('repo_verb', wrap_repository(repo), tag=tag.name, verb=verb, **kwargs) metric_queue.repository_pull.Inc(labelvalues=[namespace, repository, verb, True]) is_readonly = app.config.get('REGISTRY_STATE', 'normal') == 'readonly' # Lookup/create the derived image for the verb and repo image. if is_readonly: derived_image = registry_model.lookup_derived_image( manifest, verb, storage, varying_metadata={'tag': tag.name}, include_placements=True) else: derived_image = registry_model.lookup_or_create_derived_image( manifest, verb, storage.preferred_locations[0], storage, varying_metadata={'tag': tag.name}, include_placements=True) if derived_image is None: logger.error('Could not create or lookup a derived image for manifest %s', manifest) abort(400) if derived_image is not None and not derived_image.blob.uploading: logger.debug('Derived %s image %s exists in storage', verb, derived_image) is_head_request = request.method == 'HEAD' metric_queue.pull_byte_count.Inc(derived_image.blob.compressed_size, labelvalues=[verb]) download_url = storage.get_direct_download_url(derived_image.blob.placements, derived_image.blob.storage_path, head=is_head_request) if download_url: logger.debug('Redirecting to download URL for derived %s image %s', verb, derived_image) return redirect(download_url) # Close the database handle here for this process before we send the long download. database.close_db_filter(None) logger.debug('Sending cached derived %s image %s', verb, derived_image) return send_file( storage.stream_read_file(derived_image.blob.placements, derived_image.blob.storage_path), mimetype=LAYER_MIMETYPE) logger.debug('Building and returning derived %s image', verb) # Close the database connection before any process forking occurs. This is important because # the Postgres driver does not react kindly to forking, so we need to make sure it is closed # so that each process will get its own unique connection. database.close_db_filter(None) def _cleanup(): # Close any existing DB connection once the process has exited. database.close_db_filter(None) hasher = PieceHasher(app.config['BITTORRENT_PIECE_SIZE']) def _store_metadata_and_cleanup(): if is_readonly: return with database.UseThenDisconnect(app.config): registry_model.set_torrent_info(derived_image.blob, app.config['BITTORRENT_PIECE_SIZE'], hasher.final_piece_hashes()) registry_model.set_derived_image_size(derived_image, hasher.hashed_bytes) # Create a queue process to generate the data. The queue files will read from the process # and send the results to the client and storage. unique_id = (derived_image.unique_id if derived_image is not None else hashlib.sha256('%s:%s' % (verb, uuid.uuid4())).hexdigest()) handlers = [hasher.update] reporter = VerbReporter(verb) args = (formatter, tag, schema1_manifest, unique_id, handlers, reporter) queue_process = QueueProcess( _open_stream, 8 * 1024, 10 * 1024 * 1024, # 8K/10M chunk/max args, finished=_store_metadata_and_cleanup) client_queue_file = QueueFile(queue_process.create_queue(), 'client') if not is_readonly: storage_queue_file = QueueFile(queue_process.create_queue(), 'storage') # If signing is required, add a QueueFile for signing the image as we stream it out. signing_queue_file = None if sign and signer.name: signing_queue_file = QueueFile(queue_process.create_queue(), 'signing') # Start building. queue_process.run() # Start the storage saving. if not is_readonly: storage_args = (verb, derived_image, storage_queue_file) QueueProcess.run_process(_write_derived_image_to_storage, storage_args, finished=_cleanup) if sign and signer.name: signing_args = (verb, derived_image, signing_queue_file) QueueProcess.run_process(_sign_derived_image, signing_args, finished=_cleanup) # Close the database handle here for this process before we send the long download. database.close_db_filter(None) # Return the client's data. return send_file(client_queue_file, mimetype=LAYER_MIMETYPE) def os_arch_checker(os, arch): def checker(tag, manifest): try: image_json = json.loads(manifest.leaf_layer.raw_v1_metadata) except ValueError: logger.exception('Could not parse leaf layer JSON for manifest %s', manifest) return False except TypeError: logger.exception('Could not parse leaf layer JSON for manifest %s', manifest) return False # Verify the architecture and os. operating_system = image_json.get('os', 'linux') if operating_system != os: return False architecture = image_json.get('architecture', 'amd64') # Note: Some older Docker images have 'x86_64' rather than 'amd64'. # We allow the conversion here. if architecture == 'x86_64' and operating_system == 'linux': architecture = 'amd64' if architecture != arch: return False return True return checker @route_show_if(features.ACI_CONVERSION) @anon_protect @verbs.route('/aci/////sig///', methods=['GET']) @verbs.route('/aci/////aci.asc///', methods=['GET']) @process_auth def get_aci_signature(server, namespace, repository, tag, os, arch): return _repo_verb_signature(namespace, repository, tag, 'aci', checker=os_arch_checker(os, arch), os=os, arch=arch) @route_show_if(features.ACI_CONVERSION) @anon_protect @verbs.route('/aci/////aci///', methods=[ 'GET', 'HEAD']) @process_auth def get_aci_image(server, namespace, repository, tag, os, arch): return _repo_verb(namespace, repository, tag, 'aci', AppCImageFormatter(), sign=True, checker=os_arch_checker(os, arch), os=os, arch=arch) @anon_protect @verbs.route('/squash///', methods=['GET']) @process_auth def get_squashed_tag(namespace, repository, tag): return _repo_verb(namespace, repository, tag, 'squash', SquashedDockerImageFormatter()) @route_show_if(features.BITTORRENT) @anon_protect @verbs.route('/torrent{0}'.format(BLOB_DIGEST_ROUTE), methods=['GET']) @process_auth @parse_repository_name() @check_region_blacklisted(namespace_name_kwarg='namespace_name') def get_tag_torrent(namespace_name, repo_name, digest): repo = model.repository.get_repository(namespace_name, repo_name) repo_is_public = repo is not None and model.repository.is_repository_public(repo) permission = ReadRepositoryPermission(namespace_name, repo_name) if not permission.can() and not repo_is_public: abort(403) user = get_authenticated_user() if user is None and not repo_is_public: # We can not generate a private torrent cluster without a user uuid (e.g. token auth) abort(403) if repo is not None and repo.kind.name != 'image': abort(405) repo_ref = registry_model.lookup_repository(namespace_name, repo_name) if repo_ref is None: abort(404) blob = registry_model.get_repo_blob_by_digest(repo_ref, digest, include_placements=True) if blob is None: abort(404) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'torrent', True]) return _torrent_for_blob(blob, repo_is_public) @verbs.route('/_internal_ping') @anon_allowed def internal_ping(): return make_response('true', 200)