quay/endpoints/registry.py

import logging
import json

from flask import make_response, request, session, Response, abort, redirect
from functools import wraps
from datetime import datetime
from time import time

from data.queue import image_diff_queue

from app import app
from auth.auth import process_auth, extract_namespace_repo_from_session
from util import checksums, changes
from auth.permissions import (ReadRepositoryPermission,
                              ModifyRepositoryPermission)
from data import model


store = app.config['STORAGE']
logger = logging.getLogger(__name__)


class SocketReader(object):
  def __init__(self, fp):
    self._fp = fp
    self.handlers = []

  def add_handler(self, handler):
    self.handlers.append(handler)

  def read(self, n=-1):
    buf = self._fp.read(n)
    if not buf:
      return ''
    for handler in self.handlers:
      handler(buf)
    return buf


def require_completion(f):
  """This make sure that the image push correctly finished."""
  @wraps(f)
  def wrapper(namespace, repository, *args, **kwargs):
    if store.exists(store.image_mark_path(namespace, repository,
                                          kwargs['image_id'])):
      logger.warning('Image is already being uploaded: %s', kwargs['image_id'])
      abort(400)  # 'Image is being uploaded, retry later')
    return f(namespace, repository, *args, **kwargs)
  return wrapper


def set_cache_headers(f):
  """Returns HTTP headers suitable for caching."""
  @wraps(f)
  def wrapper(*args, **kwargs):
    # Set TTL to 1 year by default
    ttl = 31536000
    expires = datetime.fromtimestamp(int(time()) + ttl)
    expires = expires.strftime('%a, %d %b %Y %H:%M:%S GMT')
    headers = {
      'Cache-Control': 'public, max-age={0}'.format(ttl),
      'Expires': expires,
      'Last-Modified': 'Thu, 01 Jan 1970 00:00:00 GMT',
    }
    if 'If-Modified-Since' in request.headers:
      response = make_response('Not modified', 304)
      response.headers.extend(headers)
      return response
    kwargs['headers'] = headers
    # Prevent the Cookie to be sent when the object is cacheable
    session.modified = False
    return f(*args, **kwargs)
  return wrapper


@app.route('/v1/images/<image_id>/layer', methods=['GET'])
@process_auth
@extract_namespace_repo_from_session
@require_completion
@set_cache_headers
def get_image_layer(namespace, repository, image_id, headers):
  permission = ReadRepositoryPermission(namespace, repository)
  if permission.can() or model.repository_is_public(namespace, repository):
    path = store.image_layer_path(namespace, repository, image_id)
    direct_download_url = store.get_direct_download_url(path)
    if direct_download_url:
      return redirect(direct_download_url)
    try:
      return Response(store.stream_read(path), headers=headers)
    except IOError:
      logger.warning('Image not found: %s', image_id)
      abort(404)  # 'Image not found', 404)

  abort(403)


@app.route('/v1/images/<image_id>/layer', methods=['PUT'])
@process_auth
@extract_namespace_repo_from_session
def put_image_layer(namespace, repository, image_id):
  permission = ModifyRepositoryPermission(namespace, repository)
  if not permission.can():
    abort(403)

  try:
    json_data = store.get_content(store.image_json_path(namespace, repository,
                                                        image_id))
  except IOError:
    abort(404)  # 'Image not found', 404)
  layer_path = store.image_layer_path(namespace, repository, image_id)
  mark_path = store.image_mark_path(namespace, repository, image_id)
  if store.exists(layer_path) and not store.exists(mark_path):
    abort(409)  # 'Image already exists', 409)
  input_stream = request.stream
  if request.headers.get('transfer-encoding') == 'chunked':
    # Careful, might work only with WSGI servers supporting chunked
    # encoding (Gunicorn)
    input_stream = request.environ['wsgi.input']
  # compute checksums
  csums = []
  sr = SocketReader(input_stream)
  tmp, store_hndlr = store.temp_store_handler()
  sr.add_handler(store_hndlr)
  h, sum_hndlr = checksums.simple_checksum_handler(json_data)
  sr.add_handler(sum_hndlr)
  store.stream_write(layer_path, sr)
  csums.append('sha256:{0}'.format(h.hexdigest()))
  try:
    image_size = tmp.tell()

    # Save the size of the image.
    model.set_image_size(image_id, namespace, repository, image_size)

    tmp.seek(0)
    csums.append(checksums.compute_tarsum(tmp, json_data))
    tmp.close()
  except (IOError, checksums.TarError) as e:
    logger.debug('put_image_layer: Error when computing tarsum '
                 '{0}'.format(e))
  try:
    checksum = store.get_content(store.image_checksum_path(namespace,
                                                           repository,
                                                           image_id))
  except IOError:
    # We don't have a checksum stored yet, that's fine skipping the check.
    # Not removing the mark though, image is not downloadable yet.
    session['checksum'] = csums
    return make_response('true', 200)
  # We check if the checksums provided matches one the one we computed
  if checksum not in csums:
    logger.warning('put_image_layer: Wrong checksum')
    abort(400)  # 'Checksum mismatch, ignoring the layer')
  # Checksum is ok, we remove the marker
  store.remove(mark_path)

  # The layer is ready for download, send a job to the work queue to
  # process it.
  logger.debug('Queing diffs job for image: %s' % image_id)
  image_diff_queue.put(json.dumps({
    'namespace': namespace,
    'repository': repository,
    'image_id': image_id,
  }))

  return make_response('true', 200)


@app.route('/v1/images/<image_id>/checksum', methods=['PUT'])
@process_auth
@extract_namespace_repo_from_session
def put_image_checksum(namespace, repository, image_id):
  permission = ModifyRepositoryPermission(namespace, repository)
  if not permission.can():
    abort(403)

  checksum = request.headers.get('X-Docker-Checksum')
  if not checksum:
    logger.warning('Missing Image\'s checksum: %s', image_id)
    abort(400)  # 'Missing Image\'s checksum')
  if not session.get('checksum'):
    logger.warning('Checksum not found in Cookie for image: %s', image_id)
    abort(400)  # 'Checksum not found in Cookie')
  if not store.exists(store.image_json_path(namespace, repository, image_id)):
    abort(404)  # 'Image not found', 404)
  mark_path = store.image_mark_path(namespace, repository, image_id)
  if not store.exists(mark_path):
    abort(409)  # 'Cannot set this image checksum', 409)
  err = store_checksum(namespace, repository, image_id, checksum)
  if err:
    abort(err)
  if checksum not in session.get('checksum', []):
    logger.debug('session checksums: %s' % session.get('checksum', []))
    logger.debug('client supplied checksum: %s' % checksum)
    logger.debug('put_image_layer: Wrong checksum')
    abort(400)  # 'Checksum mismatch')
  # Checksum is ok, we remove the marker
  store.remove(mark_path)

  # The layer is ready for download, send a job to the work queue to
  # process it.
  logger.debug('Queing diffs job for image: %s' % image_id)
  image_diff_queue.put(json.dumps({
    'namespace': namespace,
    'repository': repository,
    'image_id': image_id,
  }))

  return make_response('true', 200)


@app.route('/v1/images/<image_id>/json', methods=['GET'])
@process_auth
@extract_namespace_repo_from_session
@require_completion
@set_cache_headers
def get_image_json(namespace, repository, image_id, headers):
  permission = ReadRepositoryPermission(namespace, repository)
  if not permission.can() and not model.repository_is_public(namespace,
                                                             repository):
    abort(403)

  try:
    data = store.get_content(store.image_json_path(namespace, repository,
                                                   image_id))
  except IOError:
    abort(404)  # 'Image not found', 404)
  try:
    size = store.get_size(store.image_layer_path(namespace, repository,
                                                 image_id))
    headers['X-Docker-Size'] = str(size)
  except OSError:
    pass
  checksum_path = store.image_checksum_path(namespace, repository, image_id)
  if store.exists(checksum_path):
    headers['X-Docker-Checksum'] = store.get_content(checksum_path)
  response = make_response(data, 200)
  response.headers.extend(headers)
  return response


@app.route('/v1/images/<image_id>/ancestry', methods=['GET'])
@process_auth
@extract_namespace_repo_from_session
@require_completion
@set_cache_headers
def get_image_ancestry(namespace, repository, image_id, headers):
  permission = ReadRepositoryPermission(namespace, repository)
  if not permission.can() and not model.repository_is_public(namespace,
                                                             repository):
    abort(403)

  try:
    data = store.get_content(store.image_ancestry_path(namespace, repository,
                                                       image_id))
  except IOError:
    abort(404)  # 'Image not found', 404)
  response = make_response(json.dumps(json.loads(data)), 200)
  response.headers.extend(headers)
  return response


def generate_ancestry(namespace, repository, image_id, parent_id=None):
  if not parent_id:
    store.put_content(store.image_ancestry_path(namespace, repository,
                                                image_id),
                      json.dumps([image_id]))
    return
  data = store.get_content(store.image_ancestry_path(namespace, repository,
                                                     parent_id))
  data = json.loads(data)
  data.insert(0, image_id)
  store.put_content(store.image_ancestry_path(namespace, repository,
                                              image_id),
                    json.dumps(data))


def store_checksum(namespace, repository, image_id, checksum):
  checksum_parts = checksum.split(':')
  if len(checksum_parts) != 2:
    return 'Invalid checksum format'
  # We store the checksum
  checksum_path = store.image_checksum_path(namespace, repository, image_id)
  store.put_content(checksum_path, checksum)


@app.route('/v1/images/<image_id>/json', methods=['PUT'])
@process_auth
@extract_namespace_repo_from_session
def put_image_json(namespace, repository, image_id):
  permission = ModifyRepositoryPermission(namespace, repository)
  if not permission.can():
    abort(403)

  try:
    data = json.loads(request.data)
  except json.JSONDecodeError:
    pass
  if not data or not isinstance(data, dict):
    logger.warning('Invalid JSON for image: %s json: %s', image_id,
                   request.data)
    abort(400)  # 'Invalid JSON')
  if 'id' not in data:
    logger.warning('Missing key `id\' in JSON for image: %s', image_id)
    abort(400)  # 'Missing key `id\' in JSON')
  # Read the checksum
  checksum = request.headers.get('X-Docker-Checksum')
  if checksum:
    # Storing the checksum is optional at this stage
    err = store_checksum(namespace, repository, image_id, checksum)
    if err:
      abort(err)
  else:
    # We cleanup any old checksum in case it's a retry after a fail
    store.remove(store.image_checksum_path(namespace, repository, image_id))
  if image_id != data['id']:
    logger.warning('JSON data contains invalid id for image: %s', image_id)
    abort(400)  # 'JSON data contains invalid id')
  parent_id = data.get('parent')
  if parent_id and not store.exists(store.image_json_path(namespace,
                                                          repository,
                                                          data['parent'])):
    logger.warning('Image depends on a non existing parent image: %s',
                   image_id)
    abort(400)  # 'Image depends on a non existing parent')
  json_path = store.image_json_path(namespace, repository, image_id)
  mark_path = store.image_mark_path(namespace, repository, image_id)
  if store.exists(json_path) and not store.exists(mark_path):
    abort(409)  # 'Image already exists', 409)
  # If we reach that point, it means that this is a new image or a retry
  # on a failed push
  # save the metadata
  if parent_id:
    parent_obj = model.get_image_by_id(namespace, repository, parent_id)
  else:
    parent_obj = None

  model.set_image_metadata(image_id, namespace, repository,
                           data.get('created'), data.get('comment'),
                           parent_obj)
  store.put_content(mark_path, 'true')
  store.put_content(json_path, request.data)
  generate_ancestry(namespace, repository, image_id, parent_id)
  return make_response('true', 200)


def process_image_changes(namespace, repository, image_id):
  logger.debug('Generating diffs for image: %s' % image_id)

  image_diffs_path = store.image_file_diffs_path(namespace, repository,
                                                 image_id)
  image_trie_path = store.image_file_trie_path(namespace, repository,
                                               image_id)

  if store.exists(image_diffs_path):
    logger.debug('Diffs already exist for image: %s' % image_id)
    return image_trie_path

  image = model.get_image_by_id(namespace, repository, image_id)
  parents = model.get_parent_images(image)

  # Compute the diffs and fs for the parent first if necessary
  parent_trie_path = None
  if parents:
    parent_trie_path = process_image_changes(namespace, repository,
                                             parents[-1].docker_image_id)

  # Read in the collapsed layer state of the filesystem for the parent
  parent_trie = changes.empty_fs()
  if parent_trie_path:
    parent_trie_bytes = store.get_content(parent_trie_path)
    parent_trie.frombytes(parent_trie_bytes)

  # Read in the file entries from the layer tar file
  layer_path = store.image_layer_path(namespace, repository, image_id)
  with store.stream_read_file(layer_path) as layer_tar_stream:
    removed_files = set()
    layer_files = changes.files_and_dirs_from_tar(layer_tar_stream,
                                                  removed_files)

    new_metadata = changes.compute_new_diffs_and_fs(parent_trie, layer_files,
                                                    removed_files)
  (new_trie, added, changed, removed) = new_metadata

  # Write out the new trie
  store.put_content(image_trie_path, new_trie.tobytes())

  # Write out the diffs
  diffs = {}
  sections = ('added', 'changed', 'removed')
  for section, source_trie in zip(sections, new_metadata[1:]):
    diffs[section] = list(source_trie)
    diffs[section].sort()
  store.put_content(image_diffs_path, json.dumps(diffs, indent=2))

  return image_trie_path