import logging import hashlib from flask import redirect, Blueprint, abort, send_file, make_response, request import features from app import app, signer, storage, metric_queue, license_validator from auth.auth_context import get_authenticated_user from auth.permissions import ReadRepositoryPermission from auth.process import process_auth from data import database from data.interfaces.verbs import pre_oci_model as model from endpoints.common import route_show_if, parse_repository_name from endpoints.decorators import anon_protect from endpoints.trackhelper import track_and_log from endpoints.v2.blob import BLOB_DIGEST_ROUTE from image.appc import AppCImageFormatter from image.docker.squashed import SquashedDockerImageFormatter from storage import Storage from util.registry.filelike import wrap_with_handler from util.registry.queuefile import QueueFile from util.registry.queueprocess import QueueProcess from util.registry.torrent import (make_torrent, per_user_torrent_filename, public_torrent_filename, PieceHasher) logger = logging.getLogger(__name__) verbs = Blueprint('verbs', __name__) license_validator.enforce_license_before_request(verbs) def _open_stream(formatter, namespace, repository, tag, derived_image_id, repo_image, handlers): """ This method generates a stream of data which will be replicated and read from the queue files. This method runs in a separate process. """ # For performance reasons, we load the full image list here, cache it, then disconnect from # the database. with database.UseThenDisconnect(app.config): image_list = list(model.get_manifest_layers_with_blobs(repo_image)) def get_next_image(): for current_image in image_list: yield current_image def get_next_layer(): # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) store = Storage(app, metric_queue) for current_image in image_list: current_image_path = model.get_blob_path(current_image.blob) current_image_stream = store.stream_read_file(current_image.blob.locations, current_image_path) logger.debug('Returning image layer %s: %s', current_image.image_id, current_image_path) yield current_image_stream stream = formatter.build_stream(namespace, repository, tag, repo_image, derived_image_id, get_next_image, get_next_layer) for handler_fn in handlers: stream = wrap_with_handler(stream, handler_fn) return stream.read def _sign_derived_image(verb, derived_image, queue_file): """ Read from the queue file and sign the contents which are generated. This method runs in a separate process. """ signature = None try: signature = signer.detached_sign(queue_file) except: logger.exception('Exception when signing %s deriving image %s', verb, derived_image.ref) return # Setup the database (since this is a new process) and then disconnect immediately # once the operation completes. if not queue_file.raised_exception: with database.UseThenDisconnect(app.config): model.set_derived_image_signature(derived_image, signer.name, signature) def _write_derived_image_to_storage(verb, derived_image, queue_file): """ Read from the generated stream and write it back to the storage engine. This method runs in a separate process. """ def handle_exception(ex): logger.debug('Exception when building %s derived image %s: %s', verb, derived_image.ref, ex) with database.UseThenDisconnect(app.config): model.delete_derived_image(derived_image) queue_file.add_exception_handler(handle_exception) # Re-Initialize the storage engine because some may not respond well to forking (e.g. S3) store = Storage(app, metric_queue) image_path = model.get_blob_path(derived_image.blob) store.stream_write(derived_image.blob.locations, image_path, queue_file) queue_file.close() def _torrent_for_blob(blob, is_public): """ Returns a response containing the torrent file contents for the given blob. May abort with an error if the state is not valid (e.g. non-public, non-user request). """ # Make sure the storage has a size. if not blob.size: abort(404) # Lookup the torrent information for the storage. torrent_info = model.get_torrent_info(blob) if torrent_info is None: abort(404) # Lookup the webseed path for the storage. path = model.get_blob_path(blob) webseed = storage.get_direct_download_url(blob.locations, path, expires_in=app.config['BITTORRENT_WEBSEED_LIFETIME']) if webseed is None: # We cannot support webseeds for storages that cannot provide direct downloads. abort(make_response('Storage engine does not support seeding.', 501)) # Build the filename for the torrent. if is_public: name = public_torrent_filename(blob.uuid) else: user = get_authenticated_user() if not user: abort(403) name = per_user_torrent_filename(user.uuid, blob.uuid) # Return the torrent file. torrent_file = make_torrent(name, webseed, blob.size, torrent_info.piece_length, torrent_info.pieces) headers = {'Content-Type': 'application/x-bittorrent', 'Content-Disposition': 'attachment; filename={0}.torrent'.format(name)} return make_response(torrent_file, 200, headers) def _torrent_repo_verb(repo_image, tag, verb, **kwargs): """ Handles returning a torrent for the given verb on the given image and tag. """ if not features.BITTORRENT: # Torrent feature is not enabled. abort(406) # Lookup an *existing* derived storage for the verb. If the verb's image storage doesn't exist, # we cannot create it here, so we 406. derived_image = model.lookup_derived_image(repo_image, verb, varying_metadata={'tag': tag}) if derived_image is None: abort(406) # Return the torrent. public_repo = model.repository_is_public(repo_image.repository.namespace_name, repo_image.repository.name) torrent = _torrent_for_blob(derived_image.blob, public_repo) # Log the action. track_and_log('repo_verb', repo_image.repository, tag=tag, verb=verb, torrent=True, **kwargs) return torrent def _verify_repo_verb(_, namespace, repository, tag, verb, checker=None): permission = ReadRepositoryPermission(namespace, repository) if not permission.can() and not model.repository_is_public(namespace, repository): abort(403) # Lookup the requested tag. tag_image = model.get_tag_image(namespace, repository, tag) if tag_image is None: abort(404) # If there is a data checker, call it first. if checker is not None: if not checker(tag_image): logger.debug('Check mismatch on %s/%s:%s, verb %s', namespace, repository, tag, verb) abort(404) return tag_image def _repo_verb_signature(namespace, repository, tag, verb, checker=None, **kwargs): # Verify that the image exists and that we have access to it. repo_image = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) # derived_image the derived image storage for the verb. derived_image = model.lookup_derived_image(repo_image, verb, varying_metadata={'tag': tag}) if derived_image is None or derived_image.blob.uploading: return make_response('', 202) # Check if we have a valid signer configured. if not signer.name: abort(404) # Lookup the signature for the verb. signature_value = model.get_derived_image_signature(derived_image, signer.name) if signature_value is None: abort(404) # Return the signature. return make_response(signature_value) def _repo_verb(namespace, repository, tag, verb, formatter, sign=False, checker=None, **kwargs): # Verify that the image exists and that we have access to it. repo_image = _verify_repo_verb(storage, namespace, repository, tag, verb, checker) # Check for torrent. If found, we return a torrent for the repo verb image (if the derived # image already exists). if request.accept_mimetypes.best == 'application/x-bittorrent': metric_queue.repository_pull.Inc(labelvalues=[namespace, repository, verb + '+torrent']) return _torrent_repo_verb(repo_image, tag, verb, **kwargs) # Log the action. track_and_log('repo_verb', repo_image.repository, tag=tag, verb=verb, **kwargs) metric_queue.repository_pull.Inc(labelvalues=[namespace, repository, verb]) # Lookup/create the derived image for the verb and repo image. derived_image = model.lookup_or_create_derived_image(repo_image, verb, storage.preferred_locations[0], varying_metadata={'tag': tag}) if not derived_image.blob.uploading: logger.debug('Derived %s image %s exists in storage', verb, derived_image.ref) derived_layer_path = model.get_blob_path(derived_image.blob) is_head_request = request.method == 'HEAD' download_url = storage.get_direct_download_url(derived_image.blob.locations, derived_layer_path, head=is_head_request) if download_url: logger.debug('Redirecting to download URL for derived %s image %s', verb, derived_image.ref) return redirect(download_url) # Close the database handle here for this process before we send the long download. database.close_db_filter(None) logger.debug('Sending cached derived %s image %s', verb, derived_image.ref) return send_file(storage.stream_read_file(derived_image.blob.locations, derived_layer_path)) logger.debug('Building and returning derived %s image %s', verb, derived_image.ref) # Calculate a derived image ID. derived_image_id = hashlib.sha256(repo_image.image_id + ':' + verb).hexdigest() def _cleanup(): # Close any existing DB connection once the process has exited. database.close_db_filter(None) hasher = PieceHasher(app.config['BITTORRENT_PIECE_SIZE']) def _store_metadata_and_cleanup(): with database.UseThenDisconnect(app.config): model.set_torrent_info(derived_image.blob, app.config['BITTORRENT_PIECE_SIZE'], hasher.final_piece_hashes()) model.set_blob_size(derived_image.blob, hasher.hashed_bytes) # Create a queue process to generate the data. The queue files will read from the process # and send the results to the client and storage. handlers = [hasher.update] args = (formatter, namespace, repository, tag, derived_image_id, repo_image, handlers) queue_process = QueueProcess(_open_stream, 8 * 1024, 10 * 1024 * 1024, # 8K/10M chunk/max args, finished=_store_metadata_and_cleanup) client_queue_file = QueueFile(queue_process.create_queue(), 'client') storage_queue_file = QueueFile(queue_process.create_queue(), 'storage') # If signing is required, add a QueueFile for signing the image as we stream it out. signing_queue_file = None if sign and signer.name: signing_queue_file = QueueFile(queue_process.create_queue(), 'signing') # Start building. queue_process.run() # Start the storage saving. storage_args = (verb, derived_image, storage_queue_file) QueueProcess.run_process(_write_derived_image_to_storage, storage_args, finished=_cleanup) if sign and signer.name: signing_args = (verb, derived_image, signing_queue_file) QueueProcess.run_process(_sign_derived_image, signing_args, finished=_cleanup) # Close the database handle here for this process before we send the long download. database.close_db_filter(None) # Return the client's data. return send_file(client_queue_file) def os_arch_checker(os, arch): def checker(repo_image): image_json = repo_image.compat_metadata # Verify the architecture and os. operating_system = image_json.get('os', 'linux') if operating_system != os: return False architecture = image_json.get('architecture', 'amd64') # Note: Some older Docker images have 'x86_64' rather than 'amd64'. # We allow the conversion here. if architecture == 'x86_64' and operating_system == 'linux': architecture = 'amd64' if architecture != arch: return False return True return checker @route_show_if(features.ACI_CONVERSION) @anon_protect @verbs.route('/aci/////sig///', methods=['GET']) @verbs.route('/aci/////aci.asc///', methods=['GET']) @process_auth def get_aci_signature(server, namespace, repository, tag, os, arch): return _repo_verb_signature(namespace, repository, tag, 'aci', checker=os_arch_checker(os, arch), os=os, arch=arch) @route_show_if(features.ACI_CONVERSION) @anon_protect @verbs.route('/aci/////aci///', methods=['GET', 'HEAD']) @process_auth def get_aci_image(server, namespace, repository, tag, os, arch): return _repo_verb(namespace, repository, tag, 'aci', AppCImageFormatter(), sign=True, checker=os_arch_checker(os, arch), os=os, arch=arch) @anon_protect @verbs.route('/squash///', methods=['GET']) @process_auth def get_squashed_tag(namespace, repository, tag): return _repo_verb(namespace, repository, tag, 'squash', SquashedDockerImageFormatter()) @route_show_if(features.BITTORRENT) @anon_protect @verbs.route('/torrent{0}'.format(BLOB_DIGEST_ROUTE), methods=['GET']) @process_auth @parse_repository_name() def get_tag_torrent(namespace_name, repo_name, digest): permission = ReadRepositoryPermission(namespace_name, repo_name) public_repo = model.repository_is_public(namespace_name, repo_name) if not permission.can() and not public_repo: abort(403) user = get_authenticated_user() if user is None and not public_repo: # We can not generate a private torrent cluster without a user uuid (e.g. token auth) abort(403) blob = model.get_repo_blob_by_digest(namespace_name, repo_name, digest) if blob is None: abort(404) metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'torrent']) return _torrent_for_blob(blob, public_repo)