This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/endpoints/registry.py
Joseph Schorr e7904f0a9e - Fix image size saving to occur before the return of the response
- Make the image size say ‘Unknown’ if it is not known
2014-01-03 16:42:38 -05:00

403 lines
15 KiB
Python

import logging
import json
from flask import make_response, request, session, Response, abort, redirect
from functools import wraps
from datetime import datetime
from time import time
from data.queue import image_diff_queue
from app import app
from auth.auth import process_auth, extract_namespace_repo_from_session
from util import checksums, changes
from auth.permissions import (ReadRepositoryPermission,
ModifyRepositoryPermission)
from data import model
store = app.config['STORAGE']
logger = logging.getLogger(__name__)
class SocketReader(object):
def __init__(self, fp):
self._fp = fp
self.handlers = []
def add_handler(self, handler):
self.handlers.append(handler)
def read(self, n=-1):
buf = self._fp.read(n)
if not buf:
return ''
for handler in self.handlers:
handler(buf)
return buf
def require_completion(f):
"""This make sure that the image push correctly finished."""
@wraps(f)
def wrapper(namespace, repository, *args, **kwargs):
if store.exists(store.image_mark_path(namespace, repository,
kwargs['image_id'])):
logger.warning('Image is already being uploaded: %s', kwargs['image_id'])
abort(400) # 'Image is being uploaded, retry later')
return f(namespace, repository, *args, **kwargs)
return wrapper
def set_cache_headers(f):
"""Returns HTTP headers suitable for caching."""
@wraps(f)
def wrapper(*args, **kwargs):
# Set TTL to 1 year by default
ttl = 31536000
expires = datetime.fromtimestamp(int(time()) + ttl)
expires = expires.strftime('%a, %d %b %Y %H:%M:%S GMT')
headers = {
'Cache-Control': 'public, max-age={0}'.format(ttl),
'Expires': expires,
'Last-Modified': 'Thu, 01 Jan 1970 00:00:00 GMT',
}
if 'If-Modified-Since' in request.headers:
response = make_response('Not modified', 304)
response.headers.extend(headers)
return response
kwargs['headers'] = headers
# Prevent the Cookie to be sent when the object is cacheable
session.modified = False
return f(*args, **kwargs)
return wrapper
@app.route('/v1/images/<image_id>/layer', methods=['GET'])
@process_auth
@extract_namespace_repo_from_session
@require_completion
@set_cache_headers
def get_image_layer(namespace, repository, image_id, headers):
permission = ReadRepositoryPermission(namespace, repository)
if permission.can() or model.repository_is_public(namespace, repository):
path = store.image_layer_path(namespace, repository, image_id)
direct_download_url = store.get_direct_download_url(path)
if direct_download_url:
return redirect(direct_download_url)
try:
return Response(store.stream_read(path), headers=headers)
except IOError:
logger.warning('Image not found: %s', image_id)
abort(404) # 'Image not found', 404)
abort(403)
@app.route('/v1/images/<image_id>/layer', methods=['PUT'])
@process_auth
@extract_namespace_repo_from_session
def put_image_layer(namespace, repository, image_id):
permission = ModifyRepositoryPermission(namespace, repository)
if not permission.can():
abort(403)
try:
json_data = store.get_content(store.image_json_path(namespace, repository,
image_id))
except IOError:
abort(404) # 'Image not found', 404)
layer_path = store.image_layer_path(namespace, repository, image_id)
mark_path = store.image_mark_path(namespace, repository, image_id)
if store.exists(layer_path) and not store.exists(mark_path):
abort(409) # 'Image already exists', 409)
input_stream = request.stream
if request.headers.get('transfer-encoding') == 'chunked':
# Careful, might work only with WSGI servers supporting chunked
# encoding (Gunicorn)
input_stream = request.environ['wsgi.input']
# compute checksums
csums = []
sr = SocketReader(input_stream)
tmp, store_hndlr = store.temp_store_handler()
sr.add_handler(store_hndlr)
h, sum_hndlr = checksums.simple_checksum_handler(json_data)
sr.add_handler(sum_hndlr)
store.stream_write(layer_path, sr)
csums.append('sha256:{0}'.format(h.hexdigest()))
try:
image_size = tmp.tell()
# Save the size of the image.
model.set_image_size(image_id, namespace, repository, image_size)
tmp.seek(0)
csums.append(checksums.compute_tarsum(tmp, json_data))
tmp.close()
except (IOError, checksums.TarError) as e:
logger.debug('put_image_layer: Error when computing tarsum '
'{0}'.format(e))
try:
checksum = store.get_content(store.image_checksum_path(namespace,
repository,
image_id))
except IOError:
# We don't have a checksum stored yet, that's fine skipping the check.
# Not removing the mark though, image is not downloadable yet.
session['checksum'] = csums
return make_response('true', 200)
# We check if the checksums provided matches one the one we computed
if checksum not in csums:
logger.warning('put_image_layer: Wrong checksum')
abort(400) # 'Checksum mismatch, ignoring the layer')
# Checksum is ok, we remove the marker
store.remove(mark_path)
# The layer is ready for download, send a job to the work queue to
# process it.
logger.debug('Queing diffs job for image: %s' % image_id)
image_diff_queue.put(json.dumps({
'namespace': namespace,
'repository': repository,
'image_id': image_id,
}))
return make_response('true', 200)
@app.route('/v1/images/<image_id>/checksum', methods=['PUT'])
@process_auth
@extract_namespace_repo_from_session
def put_image_checksum(namespace, repository, image_id):
permission = ModifyRepositoryPermission(namespace, repository)
if not permission.can():
abort(403)
checksum = request.headers.get('X-Docker-Checksum')
if not checksum:
logger.warning('Missing Image\'s checksum: %s', image_id)
abort(400) # 'Missing Image\'s checksum')
if not session.get('checksum'):
logger.warning('Checksum not found in Cookie for image: %s', image_id)
abort(400) # 'Checksum not found in Cookie')
if not store.exists(store.image_json_path(namespace, repository, image_id)):
abort(404) # 'Image not found', 404)
mark_path = store.image_mark_path(namespace, repository, image_id)
if not store.exists(mark_path):
abort(409) # 'Cannot set this image checksum', 409)
err = store_checksum(namespace, repository, image_id, checksum)
if err:
abort(err)
if checksum not in session.get('checksum', []):
logger.debug('session checksums: %s' % session.get('checksum', []))
logger.debug('client supplied checksum: %s' % checksum)
logger.debug('put_image_layer: Wrong checksum')
abort(400) # 'Checksum mismatch')
# Checksum is ok, we remove the marker
store.remove(mark_path)
# The layer is ready for download, send a job to the work queue to
# process it.
logger.debug('Queing diffs job for image: %s' % image_id)
image_diff_queue.put(json.dumps({
'namespace': namespace,
'repository': repository,
'image_id': image_id,
}))
return make_response('true', 200)
@app.route('/v1/images/<image_id>/json', methods=['GET'])
@process_auth
@extract_namespace_repo_from_session
@require_completion
@set_cache_headers
def get_image_json(namespace, repository, image_id, headers):
permission = ReadRepositoryPermission(namespace, repository)
if not permission.can() and not model.repository_is_public(namespace,
repository):
abort(403)
try:
data = store.get_content(store.image_json_path(namespace, repository,
image_id))
except IOError:
abort(404) # 'Image not found', 404)
try:
size = store.get_size(store.image_layer_path(namespace, repository,
image_id))
headers['X-Docker-Size'] = str(size)
except OSError:
pass
checksum_path = store.image_checksum_path(namespace, repository, image_id)
if store.exists(checksum_path):
headers['X-Docker-Checksum'] = store.get_content(checksum_path)
response = make_response(data, 200)
response.headers.extend(headers)
return response
@app.route('/v1/images/<image_id>/ancestry', methods=['GET'])
@process_auth
@extract_namespace_repo_from_session
@require_completion
@set_cache_headers
def get_image_ancestry(namespace, repository, image_id, headers):
permission = ReadRepositoryPermission(namespace, repository)
if not permission.can() and not model.repository_is_public(namespace,
repository):
abort(403)
try:
data = store.get_content(store.image_ancestry_path(namespace, repository,
image_id))
except IOError:
abort(404) # 'Image not found', 404)
response = make_response(json.dumps(json.loads(data)), 200)
response.headers.extend(headers)
return response
def generate_ancestry(namespace, repository, image_id, parent_id=None):
if not parent_id:
store.put_content(store.image_ancestry_path(namespace, repository,
image_id),
json.dumps([image_id]))
return
data = store.get_content(store.image_ancestry_path(namespace, repository,
parent_id))
data = json.loads(data)
data.insert(0, image_id)
store.put_content(store.image_ancestry_path(namespace, repository,
image_id),
json.dumps(data))
def store_checksum(namespace, repository, image_id, checksum):
checksum_parts = checksum.split(':')
if len(checksum_parts) != 2:
return 'Invalid checksum format'
# We store the checksum
checksum_path = store.image_checksum_path(namespace, repository, image_id)
store.put_content(checksum_path, checksum)
@app.route('/v1/images/<image_id>/json', methods=['PUT'])
@process_auth
@extract_namespace_repo_from_session
def put_image_json(namespace, repository, image_id):
permission = ModifyRepositoryPermission(namespace, repository)
if not permission.can():
abort(403)
try:
data = json.loads(request.data)
except json.JSONDecodeError:
pass
if not data or not isinstance(data, dict):
logger.warning('Invalid JSON for image: %s json: %s', image_id,
request.data)
abort(400) # 'Invalid JSON')
if 'id' not in data:
logger.warning('Missing key `id\' in JSON for image: %s', image_id)
abort(400) # 'Missing key `id\' in JSON')
# Read the checksum
checksum = request.headers.get('X-Docker-Checksum')
if checksum:
# Storing the checksum is optional at this stage
err = store_checksum(namespace, repository, image_id, checksum)
if err:
abort(err)
else:
# We cleanup any old checksum in case it's a retry after a fail
store.remove(store.image_checksum_path(namespace, repository, image_id))
if image_id != data['id']:
logger.warning('JSON data contains invalid id for image: %s', image_id)
abort(400) # 'JSON data contains invalid id')
parent_id = data.get('parent')
if parent_id and not store.exists(store.image_json_path(namespace,
repository,
data['parent'])):
logger.warning('Image depends on a non existing parent image: %s',
image_id)
abort(400) # 'Image depends on a non existing parent')
json_path = store.image_json_path(namespace, repository, image_id)
mark_path = store.image_mark_path(namespace, repository, image_id)
if store.exists(json_path) and not store.exists(mark_path):
abort(409) # 'Image already exists', 409)
# If we reach that point, it means that this is a new image or a retry
# on a failed push
# save the metadata
if parent_id:
parent_obj = model.get_image_by_id(namespace, repository, parent_id)
else:
parent_obj = None
model.set_image_metadata(image_id, namespace, repository,
data.get('created'), data.get('comment'),
parent_obj)
store.put_content(mark_path, 'true')
store.put_content(json_path, request.data)
generate_ancestry(namespace, repository, image_id, parent_id)
return make_response('true', 200)
def delete_repository_storage(namespace, repository):
""" Caller should have already verified proper permissions. """
repository_path = store.repository_namespace_path(namespace, repository)
logger.debug('Recursively deleting path: %s' % repository_path)
store.remove(repository_path)
def process_image_changes(namespace, repository, image_id):
logger.debug('Generating diffs for image: %s' % image_id)
image_diffs_path = store.image_file_diffs_path(namespace, repository,
image_id)
image_trie_path = store.image_file_trie_path(namespace, repository,
image_id)
if store.exists(image_diffs_path):
logger.debug('Diffs already exist for image: %s' % image_id)
return image_trie_path
image = model.get_image_by_id(namespace, repository, image_id)
parents = model.get_parent_images(image)
# Compute the diffs and fs for the parent first if necessary
parent_trie_path = None
if parents:
parent_trie_path = process_image_changes(namespace, repository,
parents[-1].docker_image_id)
# Read in the collapsed layer state of the filesystem for the parent
parent_trie = changes.empty_fs()
if parent_trie_path:
parent_trie_bytes = store.get_content(parent_trie_path)
parent_trie.frombytes(parent_trie_bytes)
# Read in the file entries from the layer tar file
layer_path = store.image_layer_path(namespace, repository, image_id)
with store.stream_read_file(layer_path) as layer_tar_stream:
removed_files = set()
layer_files = changes.files_and_dirs_from_tar(layer_tar_stream,
removed_files)
new_metadata = changes.compute_new_diffs_and_fs(parent_trie, layer_files,
removed_files)
(new_trie, added, changed, removed) = new_metadata
# Write out the new trie
store.put_content(image_trie_path, new_trie.tobytes())
# Write out the diffs
diffs = {}
sections = ('added', 'changed', 'removed')
for section, source_trie in zip(sections, new_metadata[1:]):
diffs[section] = list(source_trie)
diffs[section].sort()
store.put_content(image_diffs_path, json.dumps(diffs, indent=2))
return image_trie_path