From 820d5c047654df3b3798028cd6568ef37ba8fce5 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 16 Sep 2014 00:18:28 -0400 Subject: [PATCH 01/20] Add log sizes tool --- tools/logsize.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 tools/logsize.py diff --git a/tools/logsize.py b/tools/logsize.py new file mode 100644 index 000000000..ec7339553 --- /dev/null +++ b/tools/logsize.py @@ -0,0 +1,20 @@ +import logging +import json +import numpy + +from app import app, storage as store + +storage = store._storages['s3_us_east_1'] +storage._initialize_cloud_conn() + +sizes = [] +for key in storage._cloud_bucket.list('logarchive'): + sizes.append(key.size) + +array = numpy.array(sizes) +print 'Max: %s' % max(sizes) +print 'Min: %s' % min(sizes) +print 'Avg: %s' % (float(sum(sizes))/len(sizes)) +print 'Median: %s' % numpy.median(array) +print 'Std: %s' % numpy.std(array) +print 'Percent <= 64k: %s' % (float(len([entry for entry in sizes if entry <= 64000]))/len(sizes)) From e3c52fa0eb606bfc2e7b46ddf90906d4ce09c6d8 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 16 Sep 2014 00:18:57 -0400 Subject: [PATCH 02/20] Work in progress. This is currently broken! --- application.py | 2 + data/database.py | 1 + endpoints/registry.py | 1 + endpoints/verbs.py | 75 +++++++++++++++++++++ storage/distributedstorage.py | 1 + util/dockerimportformat.py | 123 ++++++++++++++++++++++++++++++++++ util/gzipwrap.py | 42 ++++++++++++ util/streamlayerformat.py | 99 +++++++++++++++++++++++++++ 8 files changed, 344 insertions(+) create mode 100644 endpoints/verbs.py create mode 100644 util/dockerimportformat.py create mode 100644 util/gzipwrap.py create mode 100644 util/streamlayerformat.py diff --git a/application.py b/application.py index 2fb79835b..b10aea363 100644 --- a/application.py +++ b/application.py @@ -17,6 +17,7 @@ from endpoints.index import index from endpoints.web import web from endpoints.tags import tags from endpoints.registry import registry +from endpoints.verbs import verbs from endpoints.webhooks import webhooks from endpoints.realtime import realtime from endpoints.callbacks import callback @@ -43,6 +44,7 @@ application.register_blueprint(callback, url_prefix='/oauth2') application.register_blueprint(index, url_prefix='/v1') application.register_blueprint(tags, url_prefix='/v1') application.register_blueprint(registry, url_prefix='/v1') +application.register_blueprint(verbs, url_prefix='/v1/repositories') application.register_blueprint(api_bp, url_prefix='/api') application.register_blueprint(webhooks, url_prefix='/webhooks') application.register_blueprint(realtime, url_prefix='/realtime') diff --git a/data/database.py b/data/database.py index 96e85a7d2..45e45b057 100644 --- a/data/database.py +++ b/data/database.py @@ -234,6 +234,7 @@ class ImageStorage(BaseModel): comment = TextField(null=True) command = TextField(null=True) image_size = BigIntegerField(null=True) + uncompressed_size = BigIntegerField(null=True) uploading = BooleanField(default=True, null=True) diff --git a/endpoints/registry.py b/endpoints/registry.py index 94719905a..4713ddd75 100644 --- a/endpoints/registry.py +++ b/endpoints/registry.py @@ -453,6 +453,7 @@ def put_image_json(namespace, repository, image_id): # We cleanup any old checksum in case it's a retry after a fail profile.debug('Cleanup old checksum') + repo_image.storage.uncompressed_size = data.get('Size') repo_image.storage.checksum = None repo_image.storage.save() diff --git a/endpoints/verbs.py b/endpoints/verbs.py new file mode 100644 index 000000000..d257898df --- /dev/null +++ b/endpoints/verbs.py @@ -0,0 +1,75 @@ +import logging +import json +import hashlib + +from flask import (make_response, request, session, Response, redirect, + Blueprint, abort, send_file, make_response) + +from app import storage as store, app +from auth.auth import process_auth +from auth.permissions import ReadRepositoryPermission +from data import model +from endpoints.registry import set_cache_headers + +from util.dockerimportformat import build_docker_import_stream + +from werkzeug.wsgi import wrap_file + +verbs = Blueprint('verbs', __name__) +logger = logging.getLogger(__name__) + + +@verbs.route('////squash', methods=['GET']) +@process_auth +@set_cache_headers +def get_squashed_tag(namespace, repository, tag, headers): + permission = ReadRepositoryPermission(namespace, repository) + if permission.can() or model.repository_is_public(namespace, repository): + # Lookup the requested tag. + tag_image = model.get_tag_image(namespace, repository, tag) + if not tag_image: + abort(404) + + # Lookup the tag's image and storage. + repo_image = model.get_repo_image(namespace, repository, tag_image.docker_image_id) + if not repo_image: + abort(404) + + # Calculate a synthetic image ID by hashing the *image storage ID* with our + # secret. This is done to prevent the ID being guessable/overwritable by + # external pushes. + unhashed = str(repo_image.storage.id) + ':' + app.config['SECRET_KEY'] + synthetic_image_id = hashlib.sha256(unhashed).hexdigest() + + # Load the ancestry for the image. + uuid = repo_image.storage.uuid + ancestry_data = store.get_content(repo_image.storage.locations, store.image_ancestry_path(uuid)) + full_image_list = json.loads(ancestry_data) + + # Load the JSON for the image. + json_data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid)) + layer_json = json.loads(json_data) + + def get_next_image(): + for current_image_id in full_image_list: + yield model.get_repo_image(namespace, repository, current_image_id) + + def get_next_layer(): + for current_image_id in full_image_list: + current_image_entry = model.get_repo_image(namespace, repository, current_image_id) + current_image_path = store.image_layer_path(current_image_entry.storage.uuid) + current_image_stream = store.stream_read_file(current_image_entry.storage.locations, + current_image_path) + + logger.debug('Returning image layer %s: %s' % (current_image_id, current_image_path)) + yield current_image_stream + + stream = build_docker_import_stream(namespace, repository, tag, synthetic_image_id, + layer_json, get_next_image, get_next_layer) + + return app.response_class(wrap_file(request.environ, stream, 1024 * 16), + mimetype='application/octet-stream', + direct_passthrough=True) + + + abort(403) diff --git a/storage/distributedstorage.py b/storage/distributedstorage.py index 1544d9725..d13362d55 100644 --- a/storage/distributedstorage.py +++ b/storage/distributedstorage.py @@ -18,6 +18,7 @@ def _location_aware(unbound_func): storage = self._storages[preferred] if not storage: + print locations storage = self._storages[random.sample(locations, 1)[0]] storage_func = getattr(storage, unbound_func.__name__) diff --git a/util/dockerimportformat.py b/util/dockerimportformat.py new file mode 100644 index 000000000..3ba159736 --- /dev/null +++ b/util/dockerimportformat.py @@ -0,0 +1,123 @@ +from util.gzipwrap import GzipWrap +from util.streamlayerformat import StreamLayerMerger + +import copy +import json +import tarfile + +from itertools import chain, islice +class some_magic_adaptor(object): + def __init__(self, src): + self.src = chain.from_iterable(src) + def read(self, n): + return "".join(islice(self.src, None, n)) + +def build_docker_import_stream(namespace, repository, tag, synthetic_image_id, + layer_json, get_image_iterator, get_layer_iterator): + """ Builds and streams a synthetic .tar.gz that represents a squashed version + of the given layers, in `docker import` V1 format. + """ + return some_magic_adaptor(_import_format_generator(namespace, repository, tag, + synthetic_image_id, layer_json, + get_image_iterator, get_layer_iterator)) + + +def _import_format_generator(namespace, repository, tag, synthetic_image_id, + layer_json, get_image_iterator, get_layer_iterator): + + # Docker import V1 Format (.tar): + # repositories - JSON file containing a repo -> tag -> image map + # {image ID folder}: + # json - The layer JSON + # layer.tar - The TARed contents of the layer + # VERSION - The docker import version: '1.0' + layer_merger = StreamLayerMerger(get_layer_iterator) + + # Yield the repositories file: + synthetic_layer_info = {} + synthetic_layer_info[tag + '.squash'] = synthetic_image_id + + repositories = {} + repositories[namespace + '/' + repository] = synthetic_layer_info + + yield _tar_file('repositories', json.dumps(repositories)) + + # Yield the image ID folder. + yield _tar_folder(synthetic_image_id) + + # Yield the JSON layer data. + layer_json = _build_layer_json(layer_json, synthetic_image_id) + yield _tar_file(synthetic_image_id + '/json', json.dumps(layer_json)) + + # Yield the VERSION file. + yield _tar_file(synthetic_image_id + '/VERSION', '1.0') + + # Yield the merged layer data's header. + estimated_file_size = 0 + for image in get_image_iterator(): + estimated_file_size += image.storage.uncompressed_size or 0 + + yield _tar_file_header(synthetic_image_id + '/layer.tar', estimated_file_size) + + # Yield the contents of the merged layer. + yielded_size = 0 + for entry in layer_merger.get_generator(): + yield entry + yielded_size += len(entry) + + # If the yielded size is less than the estimated size (which is likely), fill the rest with + # zeros. + if yielded_size < estimated_file_size: + yield '\0' * (estimated_file_size - yielded_size) + + print estimated_file_size + print yielded_size + + # Yield any file padding to 512 bytes that is necessary. + yield _tar_file_padding(estimated_file_size) + + # Last two records are empty in TAR spec. + yield '\0' * 512 + yield '\0' * 512 + + +def _build_layer_json(layer_json, synthetic_image_id): + updated_json = copy.deepcopy(layer_json) + updated_json['id'] = synthetic_image_id + + if 'parent' in updated_json: + del updated_json['parent'] + + if 'config' in updated_json and 'Image' in updated_json['config']: + updated_json['config']['Image'] = synthetic_image_id + + if 'container_config' in updated_json and 'Image' in updated_json['container_config']: + updated_json['container_config']['Image'] = synthetic_image_id + + return updated_json + + +def _tar_file(name, contents): + length = len(contents) + tar_data = _tar_file_header(name, length) + tar_data += contents + tar_data += _tar_file_padding(length) + return tar_data + + +def _tar_file_padding(length): + if length % 512 != 0: + return '\0' * (512 - (length % 512)) + + +def _tar_file_header(name, file_size): + info = tarfile.TarInfo(name=name) + info.type = tarfile.REGTYPE + info.size = file_size + return info.tobuf() + + +def _tar_folder(name): + info = tarfile.TarInfo(name=name) + info.type = tarfile.DIRTYPE + return info.tobuf() diff --git a/util/gzipwrap.py b/util/gzipwrap.py new file mode 100644 index 000000000..02be6ae18 --- /dev/null +++ b/util/gzipwrap.py @@ -0,0 +1,42 @@ +from gzip import GzipFile + +class GzipWrap(object): + def __init__(self, input, filename=None, compresslevel=1): + self.input = iter(input) + self.buffer = '' + self.zipper = GzipFile(filename, mode='wb', fileobj=self, compresslevel=compresslevel) + + def read(self, size=-1): + # If the buffer already has enough bytes, then simply pop them off of + # the beginning and return them. + if len(self.buffer) >= size: + ret = self.buffer[0:size] + self.buffer = self.buffer[size:] + return ret + + # Otherwise, zip the input until we have enough bytes. + while True: + # Attempt to retrieve the next bytes to write. + is_done = False + try: + s = self.input.next() + self.zipper.write(s) + except StopIteration: + is_done = True + + if len(self.buffer) < size or is_done: + self.zipper.flush() + + if len(self.buffer) >= size or is_done: + ret = self.buffer[0:size] + self.buffer = self.buffer[size:] + return ret + + def flush(self): + pass + + def write(self, data): + self.buffer += data + + def close(self): + self.input.close() diff --git a/util/streamlayerformat.py b/util/streamlayerformat.py new file mode 100644 index 000000000..c197763f1 --- /dev/null +++ b/util/streamlayerformat.py @@ -0,0 +1,99 @@ +import marisa_trie +import os +import tarfile +import StringIO +import traceback + +AUFS_METADATA = u'.wh..wh.' + +AUFS_WHITEOUT = u'.wh.' +AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT) + +class StreamLayerMerger(object): + """ Class which creates a generator of the combined TAR data for a set of Docker layers. """ + def __init__(self, layer_iterator): + self.trie = marisa_trie.Trie() + self.layer_iterator = layer_iterator + self.encountered = [] + + def get_generator(self): + for current_layer in self.layer_iterator(): + # Read the current layer as TAR. If it is empty, we just continue + # to the next layer. + try: + tar_file = tarfile.open(mode='r|*', fileobj=current_layer) + except tarfile.ReadError as re: + continue + + # For each of the tar entries, yield them IF and ONLY IF we have not + # encountered the path before. + + # 9MB (+ padding below) so that it matches the 10MB expected by Gzip. + chunk_size = 1024 * 1024 * 9 + + for tar_info in tar_file: + result = self.process_tar_info(tar_info) + if not result: + continue + + (tarinfo, filebuf) = result + + yield tarinfo.tobuf() + + if filebuf: + length = 0 + file_stream = tar_file.extractfile(tarinfo) + while True: + current_block = file_stream.read(chunk_size) + if not len(current_block): + break + + yield current_block + length += len(current_block) + + file_stream.close() + + # Files must be padding to 512 byte multiples. + if length % 512 != 0: + yield '\0' * (512 - (length % 512)) + + # Close the layer stream now that we're done with it. + tar_file.close() + + # Update the trie with the new encountered entries. + self.trie = marisa_trie.Trie(self.encountered) + + # Last two records are empty in TAR spec. + yield '\0' * 512 + yield '\0' * 512 + + + def process_tar_info(self, tar_info): + absolute = os.path.relpath(tar_info.name.decode('utf-8'), './') + dirname = os.path.dirname(absolute) + filename = os.path.basename(absolute) + + # Skip directories and metadata + if (filename.startswith(AUFS_METADATA) or + absolute.startswith(AUFS_METADATA)): + # Skip + return None + + elif filename.startswith(AUFS_WHITEOUT): + removed_filename = filename[AUFS_WHITEOUT_PREFIX_LENGTH:] + removed_prefix = os.path.join('/', dirname, removed_filename) + self.encountered.append(removed_prefix) + return None + + # Check if this file has already been encountered somewhere. If so, + # skip it. + if unicode(absolute) in self.trie: + return None + + self.encountered.append(absolute) + + if tar_info.isdir() or tar_info.issym() or tar_info.islnk(): + return (tar_info, False) + + elif tar_info.isfile(): + return (tar_info, True) From 9344839295896f5eca4dbc7a6f3246095b31e2cf Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 16 Sep 2014 11:53:54 -0400 Subject: [PATCH 03/20] Get squashed endpoint for `docker import` working --- application.py | 2 +- endpoints/verbs.py | 15 +---- util/dockerimportformat.py | 123 ------------------------------------- 3 files changed, 4 insertions(+), 136 deletions(-) delete mode 100644 util/dockerimportformat.py diff --git a/application.py b/application.py index b10aea363..c11d8e9dc 100644 --- a/application.py +++ b/application.py @@ -44,7 +44,7 @@ application.register_blueprint(callback, url_prefix='/oauth2') application.register_blueprint(index, url_prefix='/v1') application.register_blueprint(tags, url_prefix='/v1') application.register_blueprint(registry, url_prefix='/v1') -application.register_blueprint(verbs, url_prefix='/v1/repositories') +application.register_blueprint(verbs, url_prefix='/verbs/v1/repositories') application.register_blueprint(api_bp, url_prefix='/api') application.register_blueprint(webhooks, url_prefix='/webhooks') application.register_blueprint(realtime, url_prefix='/realtime') diff --git a/endpoints/verbs.py b/endpoints/verbs.py index d257898df..929f362dd 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -11,7 +11,8 @@ from auth.permissions import ReadRepositoryPermission from data import model from endpoints.registry import set_cache_headers -from util.dockerimportformat import build_docker_import_stream +from util.gzipwrap import GzipWrap +from util.streamlayerformat import StreamLayerMerger from werkzeug.wsgi import wrap_file @@ -46,14 +47,6 @@ def get_squashed_tag(namespace, repository, tag, headers): ancestry_data = store.get_content(repo_image.storage.locations, store.image_ancestry_path(uuid)) full_image_list = json.loads(ancestry_data) - # Load the JSON for the image. - json_data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid)) - layer_json = json.loads(json_data) - - def get_next_image(): - for current_image_id in full_image_list: - yield model.get_repo_image(namespace, repository, current_image_id) - def get_next_layer(): for current_image_id in full_image_list: current_image_entry = model.get_repo_image(namespace, repository, current_image_id) @@ -64,9 +57,7 @@ def get_squashed_tag(namespace, repository, tag, headers): logger.debug('Returning image layer %s: %s' % (current_image_id, current_image_path)) yield current_image_stream - stream = build_docker_import_stream(namespace, repository, tag, synthetic_image_id, - layer_json, get_next_image, get_next_layer) - + stream = GzipWrap(StreamLayerMerger(get_next_layer).get_generator()) return app.response_class(wrap_file(request.environ, stream, 1024 * 16), mimetype='application/octet-stream', direct_passthrough=True) diff --git a/util/dockerimportformat.py b/util/dockerimportformat.py deleted file mode 100644 index 3ba159736..000000000 --- a/util/dockerimportformat.py +++ /dev/null @@ -1,123 +0,0 @@ -from util.gzipwrap import GzipWrap -from util.streamlayerformat import StreamLayerMerger - -import copy -import json -import tarfile - -from itertools import chain, islice -class some_magic_adaptor(object): - def __init__(self, src): - self.src = chain.from_iterable(src) - def read(self, n): - return "".join(islice(self.src, None, n)) - -def build_docker_import_stream(namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator): - """ Builds and streams a synthetic .tar.gz that represents a squashed version - of the given layers, in `docker import` V1 format. - """ - return some_magic_adaptor(_import_format_generator(namespace, repository, tag, - synthetic_image_id, layer_json, - get_image_iterator, get_layer_iterator)) - - -def _import_format_generator(namespace, repository, tag, synthetic_image_id, - layer_json, get_image_iterator, get_layer_iterator): - - # Docker import V1 Format (.tar): - # repositories - JSON file containing a repo -> tag -> image map - # {image ID folder}: - # json - The layer JSON - # layer.tar - The TARed contents of the layer - # VERSION - The docker import version: '1.0' - layer_merger = StreamLayerMerger(get_layer_iterator) - - # Yield the repositories file: - synthetic_layer_info = {} - synthetic_layer_info[tag + '.squash'] = synthetic_image_id - - repositories = {} - repositories[namespace + '/' + repository] = synthetic_layer_info - - yield _tar_file('repositories', json.dumps(repositories)) - - # Yield the image ID folder. - yield _tar_folder(synthetic_image_id) - - # Yield the JSON layer data. - layer_json = _build_layer_json(layer_json, synthetic_image_id) - yield _tar_file(synthetic_image_id + '/json', json.dumps(layer_json)) - - # Yield the VERSION file. - yield _tar_file(synthetic_image_id + '/VERSION', '1.0') - - # Yield the merged layer data's header. - estimated_file_size = 0 - for image in get_image_iterator(): - estimated_file_size += image.storage.uncompressed_size or 0 - - yield _tar_file_header(synthetic_image_id + '/layer.tar', estimated_file_size) - - # Yield the contents of the merged layer. - yielded_size = 0 - for entry in layer_merger.get_generator(): - yield entry - yielded_size += len(entry) - - # If the yielded size is less than the estimated size (which is likely), fill the rest with - # zeros. - if yielded_size < estimated_file_size: - yield '\0' * (estimated_file_size - yielded_size) - - print estimated_file_size - print yielded_size - - # Yield any file padding to 512 bytes that is necessary. - yield _tar_file_padding(estimated_file_size) - - # Last two records are empty in TAR spec. - yield '\0' * 512 - yield '\0' * 512 - - -def _build_layer_json(layer_json, synthetic_image_id): - updated_json = copy.deepcopy(layer_json) - updated_json['id'] = synthetic_image_id - - if 'parent' in updated_json: - del updated_json['parent'] - - if 'config' in updated_json and 'Image' in updated_json['config']: - updated_json['config']['Image'] = synthetic_image_id - - if 'container_config' in updated_json and 'Image' in updated_json['container_config']: - updated_json['container_config']['Image'] = synthetic_image_id - - return updated_json - - -def _tar_file(name, contents): - length = len(contents) - tar_data = _tar_file_header(name, length) - tar_data += contents - tar_data += _tar_file_padding(length) - return tar_data - - -def _tar_file_padding(length): - if length % 512 != 0: - return '\0' * (512 - (length % 512)) - - -def _tar_file_header(name, file_size): - info = tarfile.TarInfo(name=name) - info.type = tarfile.REGTYPE - info.size = file_size - return info.tobuf() - - -def _tar_folder(name): - info = tarfile.TarInfo(name=name) - info.type = tarfile.DIRTYPE - return info.tobuf() From 5cca609c550a43828007df4020e8e85124c14763 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 16 Sep 2014 14:20:42 -0400 Subject: [PATCH 04/20] Switch back to send_file and add a bit of gzip buffering --- endpoints/verbs.py | 5 +---- util/gzipwrap.py | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/endpoints/verbs.py b/endpoints/verbs.py index 929f362dd..159f4777f 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -58,9 +58,6 @@ def get_squashed_tag(namespace, repository, tag, headers): yield current_image_stream stream = GzipWrap(StreamLayerMerger(get_next_layer).get_generator()) - return app.response_class(wrap_file(request.environ, stream, 1024 * 16), - mimetype='application/octet-stream', - direct_passthrough=True) - + return send_file(stream) abort(403) diff --git a/util/gzipwrap.py b/util/gzipwrap.py index 02be6ae18..037196c5e 100644 --- a/util/gzipwrap.py +++ b/util/gzipwrap.py @@ -18,13 +18,21 @@ class GzipWrap(object): while True: # Attempt to retrieve the next bytes to write. is_done = False - try: - s = self.input.next() - self.zipper.write(s) - except StopIteration: - is_done = True - if len(self.buffer) < size or is_done: + input_size = 0 + input_buffer = '' + while input_size < 1024 * 256: # 256K buffer to Gzip + try: + s = self.input.next() + input_buffer += s + input_size = input_size + len(s) + except StopIteration: + is_done = True + break + + self.zipper.write(input_buffer) + + if is_done: self.zipper.flush() if len(self.buffer) >= size or is_done: From 1cfb6fc35370a96ff25dd39b62b4184859de3df4 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 16 Sep 2014 22:43:19 -0400 Subject: [PATCH 05/20] Have the squashing system write the data (via a double queue system and multiprocessing) to both the client and the stack's storage. On subsequent calls, if the synthetic image exists, it will be returned directly instead of being recomputed --- endpoints/verbs.py | 63 ++++++++++++++++++++++++++++++++++++-------- util/queuefile.py | 37 ++++++++++++++++++++++++++ util/queueprocess.py | 57 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 11 deletions(-) create mode 100644 util/queuefile.py create mode 100644 util/queueprocess.py diff --git a/endpoints/verbs.py b/endpoints/verbs.py index 159f4777f..5f4ffcfd8 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -11,6 +11,8 @@ from auth.permissions import ReadRepositoryPermission from data import model from endpoints.registry import set_cache_headers +from util.queuefile import QueueFile +from util.queueprocess import QueueProcess from util.gzipwrap import GzipWrap from util.streamlayerformat import StreamLayerMerger @@ -19,6 +21,24 @@ from werkzeug.wsgi import wrap_file verbs = Blueprint('verbs', __name__) logger = logging.getLogger(__name__) +def _open_stream(namespace, repository, image_list): + def get_next_layer(): + for current_image_id in image_list: + current_image_entry = model.get_repo_image(namespace, repository, current_image_id) + current_image_path = store.image_layer_path(current_image_entry.storage.uuid) + current_image_stream = store.stream_read_file(current_image_entry.storage.locations, + current_image_path) + + logger.debug('Returning image layer %s: %s' % (current_image_id, current_image_path)) + yield current_image_stream + + stream = GzipWrap(StreamLayerMerger(get_next_layer).get_generator()) + return stream.read + +def _write_image_to_storage(namespace, repository, locations, image_id, queue_file): + image_path = store.image_layer_path(image_id) + store.stream_write(locations, image_path, queue_file) + queue_file.close() @verbs.route('////squash', methods=['GET']) @process_auth @@ -42,22 +62,43 @@ def get_squashed_tag(namespace, repository, tag, headers): unhashed = str(repo_image.storage.id) + ':' + app.config['SECRET_KEY'] synthetic_image_id = hashlib.sha256(unhashed).hexdigest() + # Check to see if the synthetic image ID exists in storage. If so, we just return a 302. + logger.debug('Looking up synthetic image %s', synthetic_image_id) + + locations = repo_image.storage.locations + saved_image_path = store.image_layer_path(synthetic_image_id) + if store.exists(locations, saved_image_path): + logger.debug('Synthetic image %s exists in storage', synthetic_image_id) + download_url = store.get_direct_download_url(locations, saved_image_path) + if download_url: + logger.debug('Redirecting to download URL for synthetic image %s', synthetic_image_id) + return redirect(download_url, code=302) + + logger.debug('Sending cached synthetic image %s', synthetic_image_id) + return send_file(store.stream_read_file(locations, saved_image_path)) + # Load the ancestry for the image. + logger.debug('Building and returning synthetic image %s', synthetic_image_id) uuid = repo_image.storage.uuid ancestry_data = store.get_content(repo_image.storage.locations, store.image_ancestry_path(uuid)) full_image_list = json.loads(ancestry_data) - - def get_next_layer(): - for current_image_id in full_image_list: - current_image_entry = model.get_repo_image(namespace, repository, current_image_id) - current_image_path = store.image_layer_path(current_image_entry.storage.uuid) - current_image_stream = store.stream_read_file(current_image_entry.storage.locations, - current_image_path) - logger.debug('Returning image layer %s: %s' % (current_image_id, current_image_path)) - yield current_image_stream + # Create a queue process to generate the data. The queue files will read from the process + # and send the results to the client and storage. + args = (namespace, repository, full_image_list) + queue_process = QueueProcess(_open_stream, 8 * 1024, 10 * 1024 * 1024, args) # 8K/10M chunk/max - stream = GzipWrap(StreamLayerMerger(get_next_layer).get_generator()) - return send_file(stream) + client_queue_file = QueueFile(queue_process.create_queue(), 'client') + storage_queue_file = QueueFile(queue_process.create_queue(), 'storage') + + # Start building. + queue_process.run() + + # Start the storage saving. + storage_args = (namespace, repository, locations, synthetic_image_id, storage_queue_file) + QueueProcess.run_process(_write_image_to_storage, storage_args) + + # Return the client's data. + return send_file(client_queue_file) abort(403) diff --git a/util/queuefile.py b/util/queuefile.py new file mode 100644 index 000000000..9c64c26fb --- /dev/null +++ b/util/queuefile.py @@ -0,0 +1,37 @@ +from multiprocessing import Queue +import os + +class QueueFile(object): + """ Class which implements a file-like interface and reads from a blocking + multiprocessing queue. + """ + def __init__(self, queue, name=None): + self._queue = queue + self._closed = False + self._done = False + self._buffer = '' + self._total_size = 0 + self._name = name + + def read(self, size=8192): + if self._closed or self._done: + return None + + while len(self._buffer) < size: + result = self._queue.get(block=True) + if result is None: + self._done = True + break + + self._buffer += result + self._total_size += len(result) + + buf = self._buffer[0:size] + self._buffer = self._buffer[size:] + return buf + + def flush(self): + pass + + def close(self): + self._closed = True diff --git a/util/queueprocess.py b/util/queueprocess.py new file mode 100644 index 000000000..bf8ecb280 --- /dev/null +++ b/util/queueprocess.py @@ -0,0 +1,57 @@ +from multiprocessing import Process, Queue +import logging +import multiprocessing +import os +import time +import gipc + +logger = multiprocessing.log_to_stderr() +logger.setLevel(logging.INFO) + +class QueueProcess(object): + """ Helper class which invokes a worker in a process to produce + data for one (or more) queues. + """ + def __init__(self, get_producer, chunk_size, max_size, args): + self._get_producer = get_producer + self._queues = [] + self._chunk_size = chunk_size + self._max_size = max_size + self._args = args or [] + + def create_queue(self): + """ Adds a multiprocessing queue to the list of queues. Any queues added + will have the data produced appended. + """ + queue = Queue(self._max_size / self._chunk_size) + self._queues.append(queue) + return queue + + @staticmethod + def run_process(target, args): + gipc.start_process(target=target, args=args) + + def run(self): + # Important! gipc is used here because normal multiprocessing does not work + # correctly with gevent when we sleep. + args = (self._get_producer, self._queues, self._chunk_size, self._args) + QueueProcess.run_process(_run, args) + +def _run(get_producer, queues, chunk_size, args): + producer = get_producer(*args) + while True: + data = producer(chunk_size) or None + for queue in queues: + try: + queue.put(data, block=True, timeout=10) + except Exception as ex: + # One of the listeners stopped listening. + return + + if data is None: + break + + # Important! This allows the thread that writes the queue data to the pipe + # to do so. Otherwise, this hangs. + time.sleep(0) + From efc06b54f64916fee98e6a1deacaf2b7ed34db6d Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 16 Sep 2014 22:44:45 -0400 Subject: [PATCH 06/20] Add a TODO and some slightly better naming --- endpoints/verbs.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/endpoints/verbs.py b/endpoints/verbs.py index 5f4ffcfd8..1985f47d4 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -35,8 +35,10 @@ def _open_stream(namespace, repository, image_list): stream = GzipWrap(StreamLayerMerger(get_next_layer).get_generator()) return stream.read -def _write_image_to_storage(namespace, repository, locations, image_id, queue_file): - image_path = store.image_layer_path(image_id) +def _write_synthetic_image_to_storage(namespace, repository, locations, + synthetic_image_id, queue_file): + # TODO: make sure this synthetic image expires! + image_path = store.image_layer_path(synthetic_image_id) store.stream_write(locations, image_path, queue_file) queue_file.close() @@ -96,7 +98,7 @@ def get_squashed_tag(namespace, repository, tag, headers): # Start the storage saving. storage_args = (namespace, repository, locations, synthetic_image_id, storage_queue_file) - QueueProcess.run_process(_write_image_to_storage, storage_args) + QueueProcess.run_process(_write_synthetic_image_to_storage, storage_args) # Return the client's data. return send_file(client_queue_file) From 62f1f5f583eb5210df6e52d0218f8e145eed828d Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 17 Sep 2014 11:50:52 -0400 Subject: [PATCH 07/20] Add basic layer merging tests --- test/test_streamlayerformat.py | 143 +++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 test/test_streamlayerformat.py diff --git a/test/test_streamlayerformat.py b/test/test_streamlayerformat.py new file mode 100644 index 000000000..056afa4ce --- /dev/null +++ b/test/test_streamlayerformat.py @@ -0,0 +1,143 @@ +import unittest +import tarfile + +from StringIO import StringIO +from util.streamlayerformat import StreamLayerMerger, AUFS_WHITEOUT + +class TestStreamLayerMerger(unittest.TestCase): + def create_layer(self, **kwargs): + output = StringIO() + with tarfile.open(fileobj=output, mode='w:gz') as tar: + for filename in kwargs: + current_filename = filename + current_contents = kwargs[filename] + + if current_contents is None: + # This is a deleted file. + current_filename = AUFS_WHITEOUT + current_filename + current_contents = '' + + info = tarfile.TarInfo(name=current_filename) + info.size = len(current_contents) + tar.addfile(info, fileobj=StringIO(current_contents)) + + return output.getvalue() + + def squash_layers(self, layers): + def get_layers(): + return [StringIO(layer) for layer in layers] + + merger = StreamLayerMerger(get_layers) + merged_data = ''.join(merger.get_generator()) + return merged_data + + def assertHasFile(self, squashed, filename, contents): + with tarfile.open(fileobj=StringIO(squashed), mode='r:*') as tar: + member = tar.getmember(filename) + self.assertEquals(contents, '\n'.join(tar.extractfile(member).readlines())) + + def assertDoesNotHaveFile(self, squashed, filename): + with tarfile.open(fileobj=StringIO(squashed), mode='r:*') as tar: + try: + member = tar.getmember(filename) + self.fail('Filename %s found' % filename) + except: + pass + + def test_single_layer(self): + tar_layer = self.create_layer( + some_file = 'foo', + another_file = 'bar', + third_file = 'meh') + + squashed = self.squash_layers([tar_layer]) + + self.assertHasFile(squashed, 'some_file', 'foo') + self.assertHasFile(squashed, 'another_file', 'bar') + self.assertHasFile(squashed, 'third_file', 'meh') + + def test_multiple_layers(self): + second_layer = self.create_layer( + some_file = 'foo', + another_file = 'bar', + third_file = 'meh') + + first_layer = self.create_layer( + top_file = 'top') + + squashed = self.squash_layers([first_layer, second_layer]) + + self.assertHasFile(squashed, 'some_file', 'foo') + self.assertHasFile(squashed, 'another_file', 'bar') + self.assertHasFile(squashed, 'third_file', 'meh') + self.assertHasFile(squashed, 'top_file', 'top') + + def test_multiple_layers_overwrite(self): + second_layer = self.create_layer( + some_file = 'foo', + another_file = 'bar', + third_file = 'meh') + + first_layer = self.create_layer( + another_file = 'top') + + squashed = self.squash_layers([first_layer, second_layer]) + + self.assertHasFile(squashed, 'some_file', 'foo') + self.assertHasFile(squashed, 'third_file', 'meh') + self.assertHasFile(squashed, 'another_file', 'top') + + def test_deleted_file(self): + second_layer = self.create_layer( + some_file = 'foo', + another_file = 'bar', + third_file = 'meh') + + first_layer = self.create_layer( + another_file = None) + + squashed = self.squash_layers([first_layer, second_layer]) + + self.assertHasFile(squashed, 'some_file', 'foo') + self.assertHasFile(squashed, 'third_file', 'meh') + self.assertDoesNotHaveFile(squashed, 'another_file') + + def test_deleted_readded_file(self): + third_layer = self.create_layer( + another_file = 'bar') + + second_layer = self.create_layer( + some_file = 'foo', + another_file = None, + third_file = 'meh') + + first_layer = self.create_layer( + another_file = 'newagain') + + squashed = self.squash_layers([first_layer, second_layer, third_layer]) + + self.assertHasFile(squashed, 'some_file', 'foo') + self.assertHasFile(squashed, 'third_file', 'meh') + self.assertHasFile(squashed, 'another_file', 'newagain') + + def test_deleted_in_lower_layer(self): + third_layer = self.create_layer( + another_file = 'bar') + + second_layer = self.create_layer( + some_file = 'foo', + another_file = None, + third_file = 'meh') + + first_layer = self.create_layer( + top_file = 'top') + + squashed = self.squash_layers([first_layer, second_layer, third_layer]) + + self.assertHasFile(squashed, 'some_file', 'foo') + self.assertHasFile(squashed, 'third_file', 'meh') + self.assertHasFile(squashed, 'top_file', 'top') + self.assertDoesNotHaveFile(squashed, 'another_file') + +if __name__ == '__main__': + unittest.main() From 43555af63d6bf89eeffe019f893c82c7c99a60f2 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 17 Sep 2014 17:49:46 -0400 Subject: [PATCH 08/20] Address review comments --- storage/distributedstorage.py | 1 - tools/logsize.py | 20 -------------------- 2 files changed, 21 deletions(-) delete mode 100644 tools/logsize.py diff --git a/storage/distributedstorage.py b/storage/distributedstorage.py index d13362d55..1544d9725 100644 --- a/storage/distributedstorage.py +++ b/storage/distributedstorage.py @@ -18,7 +18,6 @@ def _location_aware(unbound_func): storage = self._storages[preferred] if not storage: - print locations storage = self._storages[random.sample(locations, 1)[0]] storage_func = getattr(storage, unbound_func.__name__) diff --git a/tools/logsize.py b/tools/logsize.py deleted file mode 100644 index ec7339553..000000000 --- a/tools/logsize.py +++ /dev/null @@ -1,20 +0,0 @@ -import logging -import json -import numpy - -from app import app, storage as store - -storage = store._storages['s3_us_east_1'] -storage._initialize_cloud_conn() - -sizes = [] -for key in storage._cloud_bucket.list('logarchive'): - sizes.append(key.size) - -array = numpy.array(sizes) -print 'Max: %s' % max(sizes) -print 'Min: %s' % min(sizes) -print 'Avg: %s' % (float(sum(sizes))/len(sizes)) -print 'Median: %s' % numpy.median(array) -print 'Std: %s' % numpy.std(array) -print 'Percent <= 64k: %s' % (float(len([entry for entry in sizes if entry <= 64000]))/len(sizes)) From 05bb710830b9cabe7e1dcba2a002d20028486d1a Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 18 Sep 2014 15:56:59 -0400 Subject: [PATCH 09/20] - Add a shared AUFS utility lib and change both changes and streamlayerformat to use it - Add UI for selecting whether to pull the tag, the repo, or the squashed tag --- static/css/quay.css | 101 +++++++++++++++++++++----------- static/directives/copy-box.html | 9 ++- static/js/app.js | 13 +++- static/js/controllers.js | 48 +++++++++++++++ static/partials/view-repo.html | 19 ++++-- util/aufs.py | 31 ++++++++++ util/changes.py | 27 +++------ util/streamlayerformat.py | 28 ++++----- 8 files changed, 197 insertions(+), 79 deletions(-) create mode 100644 util/aufs.py diff --git a/static/css/quay.css b/static/css/quay.css index c2652c8c1..4b559a104 100644 --- a/static/css/quay.css +++ b/static/css/quay.css @@ -2206,37 +2206,55 @@ p.editable:hover i { font-size: 0.8em; position: relative; margin-top: 30px; - margin-right: 26px; } .repo .pull-container { display: inline-block; - width: 300px; + width: 460px; margin-left: 10px; margin-right: 10px; vertical-align: middle; + position: relative; } -.repo .pull-container input { - cursor: default; - background: white; - color: #666; - padding: 4px; - border: 1px solid #ddd; - width: 300px; -} - -.repo-image-view .id-container { +.repo .pull-container .pull-selector { display: inline-block; - margin-top: 10px; + width: 114px; + font-size: 14px; + height: 36px; + vertical-align: top; + border: 1px solid #ddd; + margin-right: -3px; + background: #f8f8f8; + outline: none; } -.repo-image-view .id-container input { - background: #fefefe; +.repo .pull-container .pull-selector i { + display: inline-block; + margin-right: 6px; } -.repo-image-view .id-container .input-group { - width: 542px; + +.repo .pull-container .copy-box { + width: 340px; + display: inline-block; +} + +.repo .pull-container .copy-box .copy-container { + border-top-left-radius: 0px !important; + border-bottom-left-radius: 0px !important; + border-left: 0px; +} + +.repo .pull-container .dropdown-menu li i.fa { + text-align: center; + width: 12px; + display: inline-block; +} + +.repo .pull-container sup { + margin-left: 4px; + color: red; } .repo-image-view #clipboardCopied { @@ -2272,25 +2290,45 @@ p.editable:hover i { position: relative; } -.copy-box-element.disabled .input-group-addon { - display: none; +.copy-box-element .copy-container { + border-radius: 4px !important; + border: 1px solid #ddd; + position: relative; +} + +.copy-box-element input { + border: 0px; + padding-right: 32px; +} + +.copy-box-element .copy-container .copy-icon { + position: absolute; + top: 8px; + right: 10px; + display: inline-block; + color: #ddd; + font-size: 16px; + cursor: pointer; + transition: color 0.5s ease-in-out; +} + +.copy-box-element .copy-container .copy-icon.zeroclipboard-is-hover { + color: #444; } .copy-box-element.disabled input { - border-radius: 4px !important; + margin-right: 0px; +} + +.copy-box-element.disabled .copy-icon { + display: none; } .global-zeroclipboard-container embed { cursor: pointer; } -#copyClipboard.zeroclipboard-is-hover, .copy-box-element .zeroclipboard-is-hover { - background: #428bca; - color: white; - cursor: pointer !important; -} - -#clipboardCopied.hovering, .copy-box-element .hovering { +.copy-box-element .hovering { position: absolute; right: 0px; top: 40px; @@ -2298,16 +2336,11 @@ p.editable:hover i { z-index: 100; } -.copy-box-element .id-container { - display: inline-block; - vertical-align: middle; -} - .copy-box-element input { background-color: white !important; } -#clipboardCopied, .clipboard-copied-message { +.clipboard-copied-message { font-size: 0.8em; display: inline-block; margin-right: 10px; @@ -2318,7 +2351,7 @@ p.editable:hover i { border-radius: 4px; } -#clipboardCopied.animated, .clipboard-copied-message { +.clipboard-copied-message { -webkit-animation: fadeOut 4s ease-in-out 0s 1 forwards; -moz-animation: fadeOut 4s ease-in-out 0s 1 forwards; -ms-animation: fadeOut 4s ease-in-out 0s 1 forwards; diff --git a/static/directives/copy-box.html b/static/directives/copy-box.html index 07dea7407..7532a6d68 100644 --- a/static/directives/copy-box.html +++ b/static/directives/copy-box.html @@ -1,9 +1,12 @@
-
+
- - + +
diff --git a/static/js/app.js b/static/js/app.js index 9ebe2a3e1..1adf4d0b3 100644 --- a/static/js/app.js +++ b/static/js/app.js @@ -812,6 +812,15 @@ quayApp = angular.module('quay', quayDependencies, function($provide, cfpLoading return config['SERVER_HOSTNAME']; }; + config.getHost = function(opt_auth) { + var auth = opt_auth; + if (auth) { + auth = auth + '@'; + } + + return config['PREFERRED_URL_SCHEME'] + '://' + auth + config['SERVER_HOSTNAME']; + }; + config.getUrl = function(opt_path) { var path = opt_path || ''; return config['PREFERRED_URL_SCHEME'] + '://' + config['SERVER_HOSTNAME'] + path; @@ -2455,7 +2464,7 @@ quayApp.directive('copyBox', function () { restrict: 'C', scope: { 'value': '=value', - 'hoveringMessage': '=hoveringMessage' + 'hoveringMessage': '=hoveringMessage', }, controller: function($scope, $element, $rootScope) { $scope.disabled = false; @@ -2464,7 +2473,7 @@ quayApp.directive('copyBox', function () { $rootScope.__copyBoxIdCounter = number + 1; $scope.inputId = "copy-box-input-" + number; - var button = $($element).find('.input-group-addon'); + var button = $($element).find('.copy-icon'); var input = $($element).find('input'); input.attr('id', $scope.inputId); diff --git a/static/js/controllers.js b/static/js/controllers.js index 7010dc4eb..f781f4dac 100644 --- a/static/js/controllers.js +++ b/static/js/controllers.js @@ -361,6 +361,9 @@ function RepoCtrl($scope, $sanitize, Restangular, ImageMetadataService, ApiServi var namespace = $routeParams.namespace; var name = $routeParams.name; + $scope.pullCommands = []; + $scope.currentPullCommand = null; + $rootScope.title = 'Loading...'; // Watch for the destruction of the scope. @@ -395,6 +398,47 @@ function RepoCtrl($scope, $sanitize, Restangular, ImageMetadataService, ApiServi $scope.buildDialogShowCounter = 0; $scope.getFormattedCommand = ImageMetadataService.getFormattedCommand; + $scope.setCurrentPullCommand = function(pullCommand) { + $scope.currentPullCommand = pullCommand; + }; + + $scope.updatePullCommand = function() { + $scope.pullCommands = []; + + if ($scope.currentTag) { + $scope.pullCommands.push({ + 'title': 'docker pull (Tag ' + $scope.currentTag.name + ')', + 'shortTitle': 'Pull Tag', + 'icon': 'fa-tag', + 'command': 'docker pull ' + Config.getDomain() + '/' + namespace + '/' + name + ':' + $scope.currentTag.name + }); + } + + $scope.pullCommands.push({ + 'title': 'docker pull (Full Repository)', + 'shortTitle': 'Pull Repo', + 'icon': 'fa-code-fork', + 'command': 'docker pull ' + Config.getDomain() + '/' + namespace + '/' + name + }); + + if ($scope.currentTag) { + var squash = 'docker import ' + Config.getHost('ACCOUNTNAME:PASSWORDORTOKEN'); + squash += '/verbs/v1/' + namespace + '/' + name + '/' + $scope.currentTag.name + '/squash'; + squash += ' '; + squash += Config.getDomain() + '/' + namespace + '/' + name + '/' + $scope.currentTag.name + '.squash'; + + $scope.pullCommands.push({ + 'title': 'Squashed image (Tag ' + $scope.currentTag.name + ')', + 'shortTitle': 'Squashed', + 'icon': 'fa-file-archive-o', + 'command': squash, + 'experimental': true + }); + } + + $scope.currentPullCommand = $scope.pullCommands[0]; + }; + $scope.showNewBuildDialog = function() { $scope.buildDialogShowCounter++; }; @@ -587,6 +631,8 @@ function RepoCtrl($scope, $sanitize, Restangular, ImageMetadataService, ApiServi $location.search('tag', null); $location.search('image', imageId.substr(0, 12)); } + + $scope.updatePullCommand(); }; $scope.setTag = function(tagName, opt_updateURL) { @@ -621,6 +667,8 @@ function RepoCtrl($scope, $sanitize, Restangular, ImageMetadataService, ApiServi $scope.currentTag = null; $scope.currentImage = null; } + + $scope.updatePullCommand(); }; $scope.getFirstTextLine = getFirstTextLine; diff --git a/static/partials/view-repo.html b/static/partials/view-repo.html index e5f2cecc6..68be28679 100644 --- a/static/partials/view-repo.html +++ b/static/partials/view-repo.html @@ -56,10 +56,21 @@ -
-
-
-
+
+ + +
diff --git a/util/aufs.py b/util/aufs.py new file mode 100644 index 000000000..e1ffb5b4a --- /dev/null +++ b/util/aufs.py @@ -0,0 +1,31 @@ +import os + +AUFS_METADATA = u'.wh..wh.' +AUFS_WHITEOUT = u'.wh.' +AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT) + +def is_aufs_metadata(filepath): + """ Returns whether the given filepath references an AUFS metadata file. """ + filename = os.path.basename(filepath) + return filename.startswith(AUFS_METADATA) or filepath.startswith(AUFS_METADATA) + +def get_deleted_filename(filepath): + """ Returns the name of the deleted file referenced by the AUFS whiteout file at + the given path or None if the file path does not reference a whiteout file. + """ + filename = os.path.basename(filepath) + if not filename.startswith(AUFS_WHITEOUT): + return None + + return filename[AUFS_WHITEOUT_PREFIX_LENGTH:] + +def get_deleted_prefix(filepath): + """ Returns the path prefix of the deleted file referenced by the AUFS whiteout file at + the given path or None if the file path does not reference a whiteout file. + """ + deleted_filename = get_deleted_filename(filepath) + if deleted_filename is None: + return None + + dirname = os.path.dirname(filepath) + return os.path.join('/', dirname, deleted_filename) diff --git a/util/changes.py b/util/changes.py index eaeec9d83..a6d20041f 100644 --- a/util/changes.py +++ b/util/changes.py @@ -1,16 +1,10 @@ import marisa_trie import os import tarfile - - -AUFS_METADATA = u'.wh..wh.' - -AUFS_WHITEOUT = u'.wh.' -AUFS_WHITEOUT_PREFIX_LENGTH = len(AUFS_WHITEOUT) +from aufs import is_aufs_metadata, get_deleted_prefix ALLOWED_TYPES = {tarfile.REGTYPE, tarfile.AREGTYPE} - def files_and_dirs_from_tar(source_stream, removed_prefix_collector): try: tar_stream = tarfile.open(mode='r|*', fileobj=source_stream) @@ -20,22 +14,19 @@ def files_and_dirs_from_tar(source_stream, removed_prefix_collector): for tar_info in tar_stream: absolute = os.path.relpath(tar_info.name.decode('utf-8'), './') - dirname = os.path.dirname(absolute) - filename = os.path.basename(absolute) - # Skip directories and metadata - if (filename.startswith(AUFS_METADATA) or - absolute.startswith(AUFS_METADATA)): - # Skip + # Skip metadata. + if is_aufs_metadata(absolute): continue - elif filename.startswith(AUFS_WHITEOUT): - removed_filename = filename[AUFS_WHITEOUT_PREFIX_LENGTH:] - removed_prefix = os.path.join('/', dirname, removed_filename) - removed_prefix_collector.add(removed_prefix) + # Add prefixes of removed paths to the collector. + deleted_prefix = get_deleted_prefix(absolute) + if deleted_prefix is not None: + deleted_prefix.add(deleted_prefix) continue - elif tar_info.type in ALLOWED_TYPES: + # Otherwise, yield the path if it is in the allowed types. + if tar_info.type in ALLOWED_TYPES: yield '/' + absolute diff --git a/util/streamlayerformat.py b/util/streamlayerformat.py index c197763f1..757d1b4ef 100644 --- a/util/streamlayerformat.py +++ b/util/streamlayerformat.py @@ -1,8 +1,8 @@ import marisa_trie import os import tarfile -import StringIO -import traceback +from aufs import is_aufs_metadata, get_deleted_prefix + AUFS_METADATA = u'.wh..wh.' @@ -70,19 +70,15 @@ class StreamLayerMerger(object): def process_tar_info(self, tar_info): absolute = os.path.relpath(tar_info.name.decode('utf-8'), './') - dirname = os.path.dirname(absolute) - filename = os.path.basename(absolute) - # Skip directories and metadata - if (filename.startswith(AUFS_METADATA) or - absolute.startswith(AUFS_METADATA)): - # Skip + # Skip metadata. + if is_aufs_metadata(absolute): return None - elif filename.startswith(AUFS_WHITEOUT): - removed_filename = filename[AUFS_WHITEOUT_PREFIX_LENGTH:] - removed_prefix = os.path.join('/', dirname, removed_filename) - self.encountered.append(removed_prefix) + # Add any prefix of deleted paths to the prefix list. + deleted_prefix = get_deleted_prefix(absolute) + if deleted_prefix is not None: + self.encountered.append(deleted_prefix) return None # Check if this file has already been encountered somewhere. If so, @@ -90,10 +86,6 @@ class StreamLayerMerger(object): if unicode(absolute) in self.trie: return None + # Otherwise, add the path to the encountered list and return it. self.encountered.append(absolute) - - if tar_info.isdir() or tar_info.issym() or tar_info.islnk(): - return (tar_info, False) - - elif tar_info.isfile(): - return (tar_info, True) + return (tar_info, tar_info.isfile() or tar_info.isdev()) From 11bb8e6448793460a7c8443eaea9710ce2a37f0a Mon Sep 17 00:00:00 2001 From: Jake Moshenko Date: Thu, 18 Sep 2014 17:26:40 -0400 Subject: [PATCH 10/20] Actually store the generated image storage in the database, and allow it to be garbage collected when the parent image storage is collected. --- data/database.py | 20 +++++++++- data/model/legacy.py | 84 +++++++++++++++++++++++++++++++++++------- endpoints/verbs.py | 63 +++++++++++++++---------------- initdb.py | 2 + requirements-nover.txt | 1 + 5 files changed, 120 insertions(+), 50 deletions(-) diff --git a/data/database.py b/data/database.py index 45e45b057..fb060a041 100644 --- a/data/database.py +++ b/data/database.py @@ -192,7 +192,6 @@ class PermissionPrototype(BaseModel): ) - class AccessToken(BaseModel): friendly_name = CharField(null=True) code = CharField(default=random_string_generator(length=64), unique=True, @@ -238,6 +237,23 @@ class ImageStorage(BaseModel): uploading = BooleanField(default=True, null=True) +class ImageStorageTransformation(BaseModel): + name = CharField(index=True, unique=True) + + +class DerivedImageStorage(BaseModel): + source = ForeignKeyField(ImageStorage, null=True, related_name='source') + derivative = ForeignKeyField(ImageStorage, related_name='derivative') + transformation = ForeignKeyField(ImageStorageTransformation) + + class Meta: + database = db + read_slaves = (read_slave,) + indexes = ( + (('source', 'transformation'), True), + ) + + class ImageStorageLocation(BaseModel): name = CharField(unique=True, index=True) @@ -422,4 +438,4 @@ all_models = [User, Repository, Image, AccessToken, Role, RepositoryPermission, OAuthApplication, OAuthAuthorizationCode, OAuthAccessToken, NotificationKind, Notification, ImageStorageLocation, ImageStoragePlacement, ExternalNotificationEvent, ExternalNotificationMethod, RepositoryNotification, - RepositoryAuthorizedEmail] + RepositoryAuthorizedEmail, ImageStorageTransformation, DerivedImageStorage] diff --git a/data/model/legacy.py b/data/model/legacy.py index d9b2079d8..faf9de223 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -70,6 +70,10 @@ class InvalidBuildTriggerException(DataModelException): pass +class InvalidImageException(DataModelException): + pass + + class TooManyUsersException(DataModelException): pass @@ -1055,6 +1059,14 @@ def __translate_ancestry(old_ancestry, translations, repository, username, prefe return '/%s/' % '/'.join(new_ids) +def _create_storage(location_name): + storage = ImageStorage.create() + location = ImageStorageLocation.get(name=location_name) + ImageStoragePlacement.create(location=location, storage=storage) + storage.locations = {location_name} + return storage + + def find_create_or_link_image(docker_image_id, repository, username, translations, preferred_location): with config.app_config['DB_TRANSACTION_FACTORY'](db): @@ -1093,10 +1105,7 @@ def find_create_or_link_image(docker_image_id, repository, username, translation origin_image_id = to_copy.id except Image.DoesNotExist: logger.debug('Creating new storage for docker id: %s', docker_image_id) - storage = ImageStorage.create() - location = ImageStorageLocation.get(name=preferred_location) - ImageStoragePlacement.create(location=location, storage=storage) - storage.locations = {preferred_location} + storage = _create_storage(preferred_location) logger.debug('Storage locations: %s', storage.locations) @@ -1114,6 +1123,43 @@ def find_create_or_link_image(docker_image_id, repository, username, translation return new_image +def find_or_create_derived_storage(source, transformation_name, preferred_location): + try: + found = (ImageStorage + .select(ImageStorage, DerivedImageStorage) + .join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative)) + .join(ImageStorageTransformation) + .where(DerivedImageStorage.source == source, + ImageStorageTransformation.name == transformation_name) + .get()) + + found.locations = {placement.location.name for placement in found.imagestorageplacement_set} + return found + except ImageStorage.DoesNotExist: + logger.debug('Creating storage dervied from source: %s', source.uuid) + trans = ImageStorageTransformation.get(name=transformation_name) + new_storage = _create_storage(preferred_location) + DerivedImageStorage.create(source=source, derivative=new_storage, transformation=trans) + return new_storage + + +def get_storage_by_uuid(storage_uuid): + placements = list(ImageStoragePlacement + .select(ImageStoragePlacement, ImageStorage, ImageStorageLocation) + .join(ImageStorageLocation) + .switch(ImageStoragePlacement) + .join(ImageStorage) + .where(ImageStorage.uuid == storage_uuid)) + + if not placements: + raise InvalidImageException('No storage found with uuid: %s', storage_uuid) + + found = placements[0].storage + found.locations = {placement.location.name for placement in placements} + + return found + + def set_image_size(docker_image_id, namespace_name, repository_name, image_size): try: @@ -1252,15 +1298,8 @@ def garbage_collect_repository(namespace_name, repository_name): image_to_remove.delete_instance() - if uuids_to_check_for_gc: - storage_to_remove = (ImageStorage - .select() - .join(Image, JOIN_LEFT_OUTER) - .group_by(ImageStorage) - .where(ImageStorage.uuid << list(uuids_to_check_for_gc)) - .having(fn.Count(Image.id) == 0)) - - for storage in storage_to_remove: + def remove_storages(query): + for storage in query: logger.debug('Garbage collecting image storage: %s', storage.uuid) image_path = config.store.image_path(storage.uuid) @@ -1269,7 +1308,24 @@ def garbage_collect_repository(namespace_name, repository_name): placement.delete_instance() config.store.remove({location_name}, image_path) - storage.delete_instance() + storage.delete_instance(recursive=True) + + if uuids_to_check_for_gc: + storage_to_remove = (ImageStorage + .select() + .join(Image, JOIN_LEFT_OUTER) + .group_by(ImageStorage) + .where(ImageStorage.uuid << list(uuids_to_check_for_gc)) + .having(fn.Count(Image.id) == 0)) + + remove_storages(storage_to_remove) + + # Now remove any derived image storages whose sources have been removed + derived_storages_to_remove = (ImageStorage + .select() + .join(DerivedImageStorage, on=(ImageStorage.id == DerivedImageStorage.derivative)) + .where(DerivedImageStorage.source >> None)) + remove_storages(derived_storages_to_remove) return len(to_remove) diff --git a/endpoints/verbs.py b/endpoints/verbs.py index 1985f47d4..92d523537 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -1,26 +1,24 @@ import logging import json -import hashlib -from flask import (make_response, request, session, Response, redirect, - Blueprint, abort, send_file, make_response) +from flask import redirect, Blueprint, abort, send_file from app import storage as store, app from auth.auth import process_auth from auth.permissions import ReadRepositoryPermission from data import model -from endpoints.registry import set_cache_headers +from data import database from util.queuefile import QueueFile from util.queueprocess import QueueProcess from util.gzipwrap import GzipWrap from util.streamlayerformat import StreamLayerMerger -from werkzeug.wsgi import wrap_file verbs = Blueprint('verbs', __name__) logger = logging.getLogger(__name__) + def _open_stream(namespace, repository, image_list): def get_next_layer(): for current_image_id in image_list: @@ -32,20 +30,25 @@ def _open_stream(namespace, repository, image_list): logger.debug('Returning image layer %s: %s' % (current_image_id, current_image_path)) yield current_image_stream + database.configure(app.config) stream = GzipWrap(StreamLayerMerger(get_next_layer).get_generator()) return stream.read -def _write_synthetic_image_to_storage(namespace, repository, locations, - synthetic_image_id, queue_file): - # TODO: make sure this synthetic image expires! - image_path = store.image_layer_path(synthetic_image_id) - store.stream_write(locations, image_path, queue_file) + +def _write_synthetic_image_to_storage(linked_storage_uuid, linked_locations, queue_file): + image_path = store.image_layer_path(linked_storage_uuid) + store.stream_write(linked_locations, image_path, queue_file) queue_file.close() + database.configure(app.config) + done_uploading = model.get_storage_by_uuid(linked_storage_uuid) + done_uploading.uploading = False + done_uploading.save() + + @verbs.route('////squash', methods=['GET']) @process_auth -@set_cache_headers -def get_squashed_tag(namespace, repository, tag, headers): +def get_squashed_tag(namespace, repository, tag): permission = ReadRepositoryPermission(namespace, repository) if permission.can() or model.repository_is_public(namespace, repository): # Lookup the requested tag. @@ -57,34 +60,26 @@ def get_squashed_tag(namespace, repository, tag, headers): repo_image = model.get_repo_image(namespace, repository, tag_image.docker_image_id) if not repo_image: abort(404) - - # Calculate a synthetic image ID by hashing the *image storage ID* with our - # secret. This is done to prevent the ID being guessable/overwritable by - # external pushes. - unhashed = str(repo_image.storage.id) + ':' + app.config['SECRET_KEY'] - synthetic_image_id = hashlib.sha256(unhashed).hexdigest() - # Check to see if the synthetic image ID exists in storage. If so, we just return a 302. - logger.debug('Looking up synthetic image %s', synthetic_image_id) - - locations = repo_image.storage.locations - saved_image_path = store.image_layer_path(synthetic_image_id) - if store.exists(locations, saved_image_path): - logger.debug('Synthetic image %s exists in storage', synthetic_image_id) - download_url = store.get_direct_download_url(locations, saved_image_path) + derived = model.find_or_create_derived_storage(repo_image.storage, 'squash', + store.preferred_locations[0]) + if not derived.uploading: + logger.debug('Derived image %s exists in storage', derived.uuid) + derived_layer_path = store.image_layer_path(derived.uuid) + download_url = store.get_direct_download_url(derived.locations, derived_layer_path) if download_url: - logger.debug('Redirecting to download URL for synthetic image %s', synthetic_image_id) - return redirect(download_url, code=302) + logger.debug('Redirecting to download URL for derived image %s', derived.uuid) + return redirect(download_url) - logger.debug('Sending cached synthetic image %s', synthetic_image_id) - return send_file(store.stream_read_file(locations, saved_image_path)) + logger.debug('Sending cached derived image %s', derived.uuid) + return send_file(store.stream_read_file(derived.locations, derived_layer_path)) # Load the ancestry for the image. - logger.debug('Building and returning synthetic image %s', synthetic_image_id) + logger.debug('Building and returning derived image %s', derived.uuid) uuid = repo_image.storage.uuid ancestry_data = store.get_content(repo_image.storage.locations, store.image_ancestry_path(uuid)) full_image_list = json.loads(ancestry_data) - + # Create a queue process to generate the data. The queue files will read from the process # and send the results to the client and storage. args = (namespace, repository, full_image_list) @@ -92,12 +87,12 @@ def get_squashed_tag(namespace, repository, tag, headers): client_queue_file = QueueFile(queue_process.create_queue(), 'client') storage_queue_file = QueueFile(queue_process.create_queue(), 'storage') - + # Start building. queue_process.run() # Start the storage saving. - storage_args = (namespace, repository, locations, synthetic_image_id, storage_queue_file) + storage_args = (derived.uuid, derived.locations, storage_queue_file) QueueProcess.run_process(_write_synthetic_image_to_storage, storage_args) # Return the client's data. diff --git a/initdb.py b/initdb.py index 34b1c0a08..26ef28678 100644 --- a/initdb.py +++ b/initdb.py @@ -243,6 +243,8 @@ def initialize_database(): ImageStorageLocation.create(name='local_eu') ImageStorageLocation.create(name='local_us') + ImageStorageTransformation.create(name='squash') + # NOTE: These MUST be copied over to NotificationKind, since every external # notification can also generate a Quay.io notification. ExternalNotificationEvent.create(name='repo_push') diff --git a/requirements-nover.txt b/requirements-nover.txt index a3c74e89b..262e0594d 100644 --- a/requirements-nover.txt +++ b/requirements-nover.txt @@ -36,3 +36,4 @@ psycopg2 pyyaml git+https://github.com/DevTable/aniso8601-fake.git git+https://github.com/DevTable/anunidecode.git +gipc \ No newline at end of file From e273dca4b499636c273d11e3c858fdbcbce30d36 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 19 Sep 2014 12:22:54 -0400 Subject: [PATCH 11/20] Change back to using a docker load format --- endpoints/verbs.py | 22 ++++++-- static/js/controllers.js | 8 +-- util/dockerloadformat.py | 115 ++++++++++++++++++++++++++++++++++++++ util/streamlayerformat.py | 46 +++++++-------- 4 files changed, 160 insertions(+), 31 deletions(-) create mode 100644 util/dockerloadformat.py diff --git a/endpoints/verbs.py b/endpoints/verbs.py index 92d523537..8cc421098 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -1,5 +1,6 @@ import logging import json +import hashlib from flask import redirect, Blueprint, abort, send_file @@ -12,14 +13,18 @@ from data import database from util.queuefile import QueueFile from util.queueprocess import QueueProcess from util.gzipwrap import GzipWrap -from util.streamlayerformat import StreamLayerMerger +from util.dockerloadformat import build_docker_load_stream verbs = Blueprint('verbs', __name__) logger = logging.getLogger(__name__) -def _open_stream(namespace, repository, image_list): +def _open_stream(namespace, repository, tag, synthetic_image_id, image_json, image_list): + def get_next_image(): + for current_image_id in image_list: + yield model.get_repo_image(namespace, repository, current_image_id) + def get_next_layer(): for current_image_id in image_list: current_image_entry = model.get_repo_image(namespace, repository, current_image_id) @@ -31,7 +36,9 @@ def _open_stream(namespace, repository, image_list): yield current_image_stream database.configure(app.config) - stream = GzipWrap(StreamLayerMerger(get_next_layer).get_generator()) + stream = build_docker_load_stream(namespace, repository, tag, synthetic_image_id, image_json, + get_next_image, get_next_layer) + return stream.read @@ -80,9 +87,16 @@ def get_squashed_tag(namespace, repository, tag): ancestry_data = store.get_content(repo_image.storage.locations, store.image_ancestry_path(uuid)) full_image_list = json.loads(ancestry_data) + # Load the image's JSON layer. + image_json_data = store.get_content(repo_image.storage.locations, store.image_json_path(uuid)) + image_json = json.loads(image_json_data) + + # Calculate a synthetic image ID. + synthetic_image_id = hashlib.sha256(tag_image.docker_image_id + ':squash').hexdigest() + # Create a queue process to generate the data. The queue files will read from the process # and send the results to the client and storage. - args = (namespace, repository, full_image_list) + args = (namespace, repository, tag, synthetic_image_id, image_json, full_image_list) queue_process = QueueProcess(_open_stream, 8 * 1024, 10 * 1024 * 1024, args) # 8K/10M chunk/max client_queue_file = QueueFile(queue_process.create_queue(), 'client') diff --git a/static/js/controllers.js b/static/js/controllers.js index f781f4dac..d4ef7fe56 100644 --- a/static/js/controllers.js +++ b/static/js/controllers.js @@ -422,10 +422,10 @@ function RepoCtrl($scope, $sanitize, Restangular, ImageMetadataService, ApiServi }); if ($scope.currentTag) { - var squash = 'docker import ' + Config.getHost('ACCOUNTNAME:PASSWORDORTOKEN'); - squash += '/verbs/v1/' + namespace + '/' + name + '/' + $scope.currentTag.name + '/squash'; - squash += ' '; - squash += Config.getDomain() + '/' + namespace + '/' + name + '/' + $scope.currentTag.name + '.squash'; + var squash = 'curl ' + Config.getHost('ACCOUNTNAME:PASSWORDORTOKEN'); + squash += '/verbs/v1/repositories/' + namespace + '/' + name + '/'; + squash += $scope.currentTag.name + '/squash'; + squash += ' | docker load'; $scope.pullCommands.push({ 'title': 'Squashed image (Tag ' + $scope.currentTag.name + ')', diff --git a/util/dockerloadformat.py b/util/dockerloadformat.py new file mode 100644 index 000000000..2979bc70b --- /dev/null +++ b/util/dockerloadformat.py @@ -0,0 +1,115 @@ +from util.gzipwrap import GzipWrap +from util.streamlayerformat import StreamLayerMerger +from app import app + +import copy +import json +import tarfile + +def build_docker_load_stream(namespace, repository, tag, synthetic_image_id, + layer_json, get_image_iterator, get_layer_iterator): + """ Builds and streams a synthetic .tar.gz that represents a squashed version + of the given layers, in `docker load` V1 format. + """ + return GzipWrap(_import_format_generator(namespace, repository, tag, + synthetic_image_id, layer_json, + get_image_iterator, get_layer_iterator)) + + +def _import_format_generator(namespace, repository, tag, synthetic_image_id, + layer_json, get_image_iterator, get_layer_iterator): + + # Docker import V1 Format (.tar): + # repositories - JSON file containing a repo -> tag -> image map + # {image ID folder}: + # json - The layer JSON + # layer.tar - The TARed contents of the layer + # VERSION - The docker import version: '1.0' + layer_merger = StreamLayerMerger(get_layer_iterator) + + # Yield the repositories file: + synthetic_layer_info = {} + synthetic_layer_info[tag + '.squash'] = synthetic_image_id + + hostname = app.config['SERVER_HOSTNAME'] + repositories = {} + repositories[hostname + '/' + namespace + '/' + repository] = synthetic_layer_info + + yield _tar_file('repositories', json.dumps(repositories)) + + # Yield the image ID folder. + yield _tar_folder(synthetic_image_id) + + # Yield the JSON layer data. + layer_json = _build_layer_json(layer_json, synthetic_image_id) + yield _tar_file(synthetic_image_id + '/json', json.dumps(layer_json)) + + # Yield the VERSION file. + yield _tar_file(synthetic_image_id + '/VERSION', '1.0') + + # Yield the merged layer data's header. + estimated_file_size = 0 + for image in get_image_iterator(): + estimated_file_size += image.storage.uncompressed_size or 0 + + yield _tar_file_header(synthetic_image_id + '/layer.tar', estimated_file_size) + + # Yield the contents of the merged layer. + yielded_size = 0 + for entry in layer_merger.get_generator(): + yield entry + yielded_size += len(entry) + + # If the yielded size is less than the estimated size (which is likely), fill the rest with + # zeros. + if yielded_size < estimated_file_size: + yield '\0' * (estimated_file_size - yielded_size) + + # Yield any file padding to 512 bytes that is necessary. + yield _tar_file_padding(estimated_file_size) + + # Last two records are empty in TAR spec. + yield '\0' * 512 + yield '\0' * 512 + + +def _build_layer_json(layer_json, synthetic_image_id): + updated_json = copy.deepcopy(layer_json) + updated_json['id'] = synthetic_image_id + + if 'parent' in updated_json: + del updated_json['parent'] + + if 'config' in updated_json and 'Image' in updated_json['config']: + updated_json['config']['Image'] = synthetic_image_id + + if 'container_config' in updated_json and 'Image' in updated_json['container_config']: + updated_json['container_config']['Image'] = synthetic_image_id + + return updated_json + + +def _tar_file(name, contents): + length = len(contents) + tar_data = _tar_file_header(name, length) + tar_data += contents + tar_data += _tar_file_padding(length) + return tar_data + + +def _tar_file_padding(length): + if length % 512 != 0: + return '\0' * (512 - (length % 512)) + + +def _tar_file_header(name, file_size): + info = tarfile.TarInfo(name=name) + info.type = tarfile.REGTYPE + info.size = file_size + return info.tobuf() + + +def _tar_folder(name): + info = tarfile.TarInfo(name=name) + info.type = tarfile.DIRTYPE + return info.tobuf() diff --git a/util/streamlayerformat.py b/util/streamlayerformat.py index 757d1b4ef..e8ae3eb3d 100644 --- a/util/streamlayerformat.py +++ b/util/streamlayerformat.py @@ -32,30 +32,30 @@ class StreamLayerMerger(object): chunk_size = 1024 * 1024 * 9 for tar_info in tar_file: - result = self.process_tar_info(tar_info) - if not result: + if not self.check_tar_info(tar_info): continue - (tarinfo, filebuf) = result + # Yield the tar header. + yield tar_info.tobuf() - yield tarinfo.tobuf() + # Try to extract any file contents for the tar. If found, we yield them as well. + if tar_info.isreg(): + file_stream = tar_file.extractfile(tar_info) + if file_stream is not None: + length = 0 + while True: + current_block = file_stream.read(chunk_size) + if not len(current_block): + break - if filebuf: - length = 0 - file_stream = tar_file.extractfile(tarinfo) - while True: - current_block = file_stream.read(chunk_size) - if not len(current_block): - break + yield current_block + length += len(current_block) - yield current_block - length += len(current_block) + file_stream.close() - file_stream.close() - - # Files must be padding to 512 byte multiples. - if length % 512 != 0: - yield '\0' * (512 - (length % 512)) + # Files must be padding to 512 byte multiples. + if length % 512 != 0: + yield '\0' * (512 - (length % 512)) # Close the layer stream now that we're done with it. tar_file.close() @@ -68,24 +68,24 @@ class StreamLayerMerger(object): yield '\0' * 512 - def process_tar_info(self, tar_info): + def check_tar_info(self, tar_info): absolute = os.path.relpath(tar_info.name.decode('utf-8'), './') # Skip metadata. if is_aufs_metadata(absolute): - return None + return False # Add any prefix of deleted paths to the prefix list. deleted_prefix = get_deleted_prefix(absolute) if deleted_prefix is not None: self.encountered.append(deleted_prefix) - return None + return False # Check if this file has already been encountered somewhere. If so, # skip it. if unicode(absolute) in self.trie: - return None + return False # Otherwise, add the path to the encountered list and return it. self.encountered.append(absolute) - return (tar_info, tar_info.isfile() or tar_info.isdev()) + return True From 9003670826ab682055f234490b877360e2c32d2f Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 19 Sep 2014 12:25:02 -0400 Subject: [PATCH 12/20] Adjust the CSS a bit for displaying the pull box in the repo view --- static/css/quay.css | 2 ++ 1 file changed, 2 insertions(+) diff --git a/static/css/quay.css b/static/css/quay.css index 4b559a104..15c71634b 100644 --- a/static/css/quay.css +++ b/static/css/quay.css @@ -2227,6 +2227,8 @@ p.editable:hover i { margin-right: -3px; background: #f8f8f8; outline: none; + border-top-left-radius: 4px; + border-bottom-left-radius: 4px; } .repo .pull-container .pull-selector i { From e5055763f60960880e9eaa649c847eb03549dcd1 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 19 Sep 2014 12:54:52 -0400 Subject: [PATCH 13/20] Make the squashed path smaller and handle failure cases on the curl side --- application.py | 2 +- endpoints/verbs.py | 2 +- static/js/controllers.js | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/application.py b/application.py index c11d8e9dc..e7f8548f8 100644 --- a/application.py +++ b/application.py @@ -44,7 +44,7 @@ application.register_blueprint(callback, url_prefix='/oauth2') application.register_blueprint(index, url_prefix='/v1') application.register_blueprint(tags, url_prefix='/v1') application.register_blueprint(registry, url_prefix='/v1') -application.register_blueprint(verbs, url_prefix='/verbs/v1/repositories') +application.register_blueprint(verbs, url_prefix='/c1') application.register_blueprint(api_bp, url_prefix='/api') application.register_blueprint(webhooks, url_prefix='/webhooks') application.register_blueprint(realtime, url_prefix='/realtime') diff --git a/endpoints/verbs.py b/endpoints/verbs.py index 8cc421098..91b9f30a9 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -53,7 +53,7 @@ def _write_synthetic_image_to_storage(linked_storage_uuid, linked_locations, que done_uploading.save() -@verbs.route('////squash', methods=['GET']) +@verbs.route('/squash///', methods=['GET']) @process_auth def get_squashed_tag(namespace, repository, tag): permission = ReadRepositoryPermission(namespace, repository) diff --git a/static/js/controllers.js b/static/js/controllers.js index d4ef7fe56..789b07b10 100644 --- a/static/js/controllers.js +++ b/static/js/controllers.js @@ -422,9 +422,8 @@ function RepoCtrl($scope, $sanitize, Restangular, ImageMetadataService, ApiServi }); if ($scope.currentTag) { - var squash = 'curl ' + Config.getHost('ACCOUNTNAME:PASSWORDORTOKEN'); - squash += '/verbs/v1/repositories/' + namespace + '/' + name + '/'; - squash += $scope.currentTag.name + '/squash'; + var squash = 'curl -f ' + Config.getHost('ACCOUNTNAME:PASSWORDORTOKEN'); + squash += '/c1/squash/' + namespace + '/' + name + '/' + $scope.currentTag.name; squash += ' | docker load'; $scope.pullCommands.push({ From 70e0aba257c443a4d11d1142a53d24fad0050e27 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 22 Sep 2014 14:36:52 -0400 Subject: [PATCH 14/20] Add a script for generating schema migrations. Should be run from the root quay directory. --- data/migrations/env.py | 11 ++++++++-- data/migrations/generate-schema-migration.sh | 21 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) create mode 100755 data/migrations/generate-schema-migration.sh diff --git a/data/migrations/env.py b/data/migrations/env.py index d64cf4ee7..5f7bb986d 100644 --- a/data/migrations/env.py +++ b/data/migrations/env.py @@ -1,4 +1,7 @@ from __future__ import with_statement + +import os + from alembic import context from sqlalchemy import engine_from_config, pool from logging.config import fileConfig @@ -12,8 +15,12 @@ from util.morecollections import AttrDict # this is the Alembic Config object, which provides # access to the values within the .ini file in use. +db_uri = unquote(app.config['DB_URI']) +if 'GENMIGRATE' in os.environ: + db_uri = 'mysql+pymysql://root:password@192.168.59.103/genschema' + config = context.config -config.set_main_option('sqlalchemy.url', unquote(app.config['DB_URI'])) +config.set_main_option('sqlalchemy.url', db_uri) # Interpret the config file for Python logging. # This line sets up loggers basically. @@ -57,7 +64,7 @@ def run_migrations_online(): """ - if isinstance(db.obj, SqliteDatabase): + if isinstance(db.obj, SqliteDatabase) and not 'GENMIGRATE' in os.environ: print ('Skipping Sqlite migration!') return diff --git a/data/migrations/generate-schema-migration.sh b/data/migrations/generate-schema-migration.sh new file mode 100755 index 000000000..c7db14b32 --- /dev/null +++ b/data/migrations/generate-schema-migration.sh @@ -0,0 +1,21 @@ +set -e + +# Run a MySQL database on port 3306 inside of Docker. +docker run --name mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mysql + +# Sleep for 5s to get MySQL get started. +echo 'Sleeping for 5...' +sleep 5 + +# Add the database to mysql. +docker run --link mysql:mysql mysql sh -c 'echo "create database genschema" | mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -P"$MYSQL_PORT_3306_TCP_PORT" -uroot -ppassword' + +# Generate a SQLite database with the schema as defined by the existing alembic model. +GENMIGRATE=true PYTHONPATH=. alembic upgrade head + +# Generate the migration to the current model. +GENMIGRATE=true PYTHONPATH=. alembic revision --autogenerate -m "$@" + +# Kill the MySQL instance. +docker kill mysql +docker rm mysql \ No newline at end of file From f16878cce91aa9a7d8c1b7626759abb564533058 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 22 Sep 2014 14:38:42 -0400 Subject: [PATCH 15/20] Add migration for synthetic image tables --- ...399bd2_add_support_for_squashing_images.py | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py diff --git a/data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py b/data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py new file mode 100644 index 000000000..043d68db8 --- /dev/null +++ b/data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py @@ -0,0 +1,57 @@ +"""add support for squashing images + +Revision ID: 3f6d26399bd2 +Revises: 34fd69f63809 +Create Date: 2014-09-22 14:37:30.821785 + +""" + +# revision identifiers, used by Alembic. +revision = '3f6d26399bd2' +down_revision = '34fd69f63809' + +from alembic import op +import sqlalchemy as sa + + +def upgrade(tables): + ### commands auto generated by Alembic - please adjust! ### + op.create_table('imagestoragetransformation', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('name', sa.String(length=255), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + op.create_index('imagestoragetransformation_name', 'imagestoragetransformation', ['name'], unique=True) + op.create_table('derivedimagestorage', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('source_id', sa.Integer(), nullable=True), + sa.Column('derivative_id', sa.Integer(), nullable=False), + sa.Column('transformation_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['derivative_id'], ['imagestorage.id'], ), + sa.ForeignKeyConstraint(['source_id'], ['imagestorage.id'], ), + sa.ForeignKeyConstraint(['transformation_id'], ['imagestoragetransformation.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index('derivedimagestorage_derivative_id', 'derivedimagestorage', ['derivative_id'], unique=False) + op.create_index('derivedimagestorage_source_id', 'derivedimagestorage', ['source_id'], unique=False) + op.create_index('derivedimagestorage_source_id_transformation_id', 'derivedimagestorage', ['source_id', 'transformation_id'], unique=True) + op.create_index('derivedimagestorage_transformation_id', 'derivedimagestorage', ['transformation_id'], unique=False) + op.drop_index('image_repository_id_docker_image_id', table_name='image') + op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=True) + op.add_column(u'imagestorage', sa.Column('uncompressed_size', sa.BigInteger(), nullable=True)) + ### end Alembic commands ### + + +def downgrade(tables): + ### commands auto generated by Alembic - please adjust! ### + op.drop_column(u'imagestorage', 'uncompressed_size') + op.drop_index('image_repository_id_docker_image_id', table_name='image') + op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=False) + op.drop_index('derivedimagestorage_transformation_id', table_name='derivedimagestorage') + op.drop_index('derivedimagestorage_source_id_transformation_id', table_name='derivedimagestorage') + op.drop_index('derivedimagestorage_source_id', table_name='derivedimagestorage') + op.drop_index('derivedimagestorage_derivative_id', table_name='derivedimagestorage') + op.drop_table('derivedimagestorage') + op.drop_index('imagestoragetransformation_name', table_name='imagestoragetransformation') + op.drop_table('imagestoragetransformation') + ### end Alembic commands ### From 1658475ac155ccc214ba9fd82c8e628b6221dcae Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 22 Sep 2014 14:39:44 -0400 Subject: [PATCH 16/20] Previous revision should not have the image storage col in it --- .../versions/3f6d26399bd2_add_support_for_squashing_images.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py b/data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py index 043d68db8..932ffde51 100644 --- a/data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py +++ b/data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py @@ -38,13 +38,11 @@ def upgrade(tables): op.create_index('derivedimagestorage_transformation_id', 'derivedimagestorage', ['transformation_id'], unique=False) op.drop_index('image_repository_id_docker_image_id', table_name='image') op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=True) - op.add_column(u'imagestorage', sa.Column('uncompressed_size', sa.BigInteger(), nullable=True)) ### end Alembic commands ### def downgrade(tables): ### commands auto generated by Alembic - please adjust! ### - op.drop_column(u'imagestorage', 'uncompressed_size') op.drop_index('image_repository_id_docker_image_id', table_name='image') op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=False) op.drop_index('derivedimagestorage_transformation_id', table_name='derivedimagestorage') From 297c8ad29c25ee1785e785458a96131dbba5d6d3 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 22 Sep 2014 15:04:28 -0400 Subject: [PATCH 17/20] Add migration to backfill uncompressed image sizes on the storage --- ...fcf_add_the_uncompressed_size_to_image_.py | 29 +++++++++++++++++ tools/uncompressedsize.py | 31 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 data/migrations/versions/6f2ecf5afcf_add_the_uncompressed_size_to_image_.py create mode 100644 tools/uncompressedsize.py diff --git a/data/migrations/versions/6f2ecf5afcf_add_the_uncompressed_size_to_image_.py b/data/migrations/versions/6f2ecf5afcf_add_the_uncompressed_size_to_image_.py new file mode 100644 index 000000000..1081df651 --- /dev/null +++ b/data/migrations/versions/6f2ecf5afcf_add_the_uncompressed_size_to_image_.py @@ -0,0 +1,29 @@ +"""add the uncompressed size to image storage + +Revision ID: 6f2ecf5afcf +Revises: 3f6d26399bd2 +Create Date: 2014-09-22 14:39:13.470566 + +""" + +# revision identifiers, used by Alembic. +revision = '6f2ecf5afcf' +down_revision = '3f6d26399bd2' + +from alembic import op +from tools.uncompressedsize import backfill_sizes +import sqlalchemy as sa + + +def upgrade(tables): + ### commands auto generated by Alembic - please adjust! ### + op.add_column('imagestorage', sa.Column('uncompressed_size', sa.BigInteger(), nullable=True)) + ### end Alembic commands ### + + # Backfill the uncompressed size to the image storage table. + backfill_sizes() + +def downgrade(tables): + ### commands auto generated by Alembic - please adjust! ### + op.drop_column('imagestorage', 'uncompressed_size') + ### end Alembic commands ### diff --git a/tools/uncompressedsize.py b/tools/uncompressedsize.py new file mode 100644 index 000000000..530ba3836 --- /dev/null +++ b/tools/uncompressedsize.py @@ -0,0 +1,31 @@ +from data import model +from data.database import ImageStorage +from app import app, storage as store + +import logging + +def backfill_sizes(): + count = ImageStorage.select().where(ImageStorage.uncompressed_size == None).count() + counter = 0 + for image_storage in ImageStorage.select().where(ImageStorage.uncompressed_size == None): + logging.debug("Backfilling uncompressed size: %s of %s" % (counter, count)) + + # Lookup the JSON for the image. + uuid = image_storage.uuid + with_locations = model.get_storage_by_uuid(uuid) + + json_data = store.get_content(with_locations.locations, store.image_json_path(uuid)) + size = json_data.get('Size', None) + if size is None: + continue + + image_storage.uncompressed_size = size + image_storage.save() + counter += 1 + + +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) + logging.getLogger('boto').setLevel(logging.CRITICAL) + + backfill_sizes() \ No newline at end of file From 746936ce663583d1f5d572661e22493464c79886 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 29 Sep 2014 12:54:22 -0400 Subject: [PATCH 18/20] - Make the layer going over the estimated size raise an exception - Add a heuristic for estimating the layer size if it is 0 - Add a method where we can add a custom defined map of image -> size --- tools/uncompressedsize.py | 31 ------------------------------- util/dockerloadformat.py | 27 ++++++++++++++++++++++++++- util/queuefile.py | 6 +++++- util/queueprocess.py | 8 ++++++-- 4 files changed, 37 insertions(+), 35 deletions(-) delete mode 100644 tools/uncompressedsize.py diff --git a/tools/uncompressedsize.py b/tools/uncompressedsize.py deleted file mode 100644 index 530ba3836..000000000 --- a/tools/uncompressedsize.py +++ /dev/null @@ -1,31 +0,0 @@ -from data import model -from data.database import ImageStorage -from app import app, storage as store - -import logging - -def backfill_sizes(): - count = ImageStorage.select().where(ImageStorage.uncompressed_size == None).count() - counter = 0 - for image_storage in ImageStorage.select().where(ImageStorage.uncompressed_size == None): - logging.debug("Backfilling uncompressed size: %s of %s" % (counter, count)) - - # Lookup the JSON for the image. - uuid = image_storage.uuid - with_locations = model.get_storage_by_uuid(uuid) - - json_data = store.get_content(with_locations.locations, store.image_json_path(uuid)) - size = json_data.get('Size', None) - if size is None: - continue - - image_storage.uncompressed_size = size - image_storage.save() - counter += 1 - - -if __name__ == "__main__": - logging.basicConfig(level=logging.DEBUG) - logging.getLogger('boto').setLevel(logging.CRITICAL) - - backfill_sizes() \ No newline at end of file diff --git a/util/dockerloadformat.py b/util/dockerloadformat.py index 2979bc70b..ed6fecc52 100644 --- a/util/dockerloadformat.py +++ b/util/dockerloadformat.py @@ -6,6 +6,14 @@ import copy import json import tarfile +class FileEstimationException(Exception): + """ Exception raised by build_docker_load_stream if the estimated size of the layer TAR + was lower than the actual size. This means the sent TAR header is wrong, and we have + to fail. + """ + pass + + def build_docker_load_stream(namespace, repository, tag, synthetic_image_id, layer_json, get_image_iterator, get_layer_iterator): """ Builds and streams a synthetic .tar.gz that represents a squashed version @@ -50,7 +58,9 @@ def _import_format_generator(namespace, repository, tag, synthetic_image_id, # Yield the merged layer data's header. estimated_file_size = 0 for image in get_image_iterator(): - estimated_file_size += image.storage.uncompressed_size or 0 + estimated_file_size += (image.storage.uncompressed_size or + _get_mapped_size(image) or + _estimate_size(image)) yield _tar_file_header(synthetic_image_id + '/layer.tar', estimated_file_size) @@ -60,6 +70,11 @@ def _import_format_generator(namespace, repository, tag, synthetic_image_id, yield entry yielded_size += len(entry) + # If the yielded size is more than the estimated size (which is unlikely but possible), then + # raise an exception since the tar header will be wrong. + if yielded_size > estimated_file_size: + raise FileEstimationException() + # If the yielded size is less than the estimated size (which is likely), fill the rest with # zeros. if yielded_size < estimated_file_size: @@ -113,3 +128,13 @@ def _tar_folder(name): info = tarfile.TarInfo(name=name) info.type = tarfile.DIRTYPE return info.tobuf() + +def _get_mapped_size(image): + """ Returns a predefined image size for the given image or None if not found. """ + return None + +def _estimate_size(image): + """ Estimates a file size based on a heuristic. """ + # More than 1 SD away from the size difference in the DB, as of 9/29/2014 + return image.storage.image_size * 12 + diff --git a/util/queuefile.py b/util/queuefile.py index 9c64c26fb..ee8ea20c1 100644 --- a/util/queuefile.py +++ b/util/queuefile.py @@ -22,7 +22,11 @@ class QueueFile(object): if result is None: self._done = True break - + + if isinstance(result, Exception): + self._closed = True + raise result + self._buffer += result self._total_size += len(result) diff --git a/util/queueprocess.py b/util/queueprocess.py index bf8ecb280..55c552422 100644 --- a/util/queueprocess.py +++ b/util/queueprocess.py @@ -40,7 +40,11 @@ class QueueProcess(object): def _run(get_producer, queues, chunk_size, args): producer = get_producer(*args) while True: - data = producer(chunk_size) or None + try: + data = producer(chunk_size) or None + except Exception as ex: + data = ex + for queue in queues: try: queue.put(data, block=True, timeout=10) @@ -48,7 +52,7 @@ def _run(get_producer, queues, chunk_size, args): # One of the listeners stopped listening. return - if data is None: + if data is None or isinstance(data, Exception): break # Important! This allows the thread that writes the queue data to the pipe From 3d3f2dd6d7cf472a02355bfbc2005ed58c76779a Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 29 Sep 2014 15:33:26 -0400 Subject: [PATCH 19/20] Remove map call and estimate call --- util/dockerloadformat.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/util/dockerloadformat.py b/util/dockerloadformat.py index ed6fecc52..350753753 100644 --- a/util/dockerloadformat.py +++ b/util/dockerloadformat.py @@ -58,9 +58,7 @@ def _import_format_generator(namespace, repository, tag, synthetic_image_id, # Yield the merged layer data's header. estimated_file_size = 0 for image in get_image_iterator(): - estimated_file_size += (image.storage.uncompressed_size or - _get_mapped_size(image) or - _estimate_size(image)) + estimated_file_size += image.storage.uncompressed_size yield _tar_file_header(synthetic_image_id + '/layer.tar', estimated_file_size) @@ -127,14 +125,4 @@ def _tar_file_header(name, file_size): def _tar_folder(name): info = tarfile.TarInfo(name=name) info.type = tarfile.DIRTYPE - return info.tobuf() - -def _get_mapped_size(image): - """ Returns a predefined image size for the given image or None if not found. """ - return None - -def _estimate_size(image): - """ Estimates a file size based on a heuristic. """ - # More than 1 SD away from the size difference in the DB, as of 9/29/2014 - return image.storage.image_size * 12 - + return info.tobuf() From f4daa5e97b0d20b6602f30a6ec4eb0b70544cc57 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 7 Oct 2014 15:29:56 -0400 Subject: [PATCH 20/20] - Update the migrations tool to verify migrations work up and down for both MySQL and PostgresSQL. - Add migrations for the squashed image tables and for backfilling the uncompressed sizes - Make sure gzip stream uses a max length when determining the uncompressed size --- data/database.py | 2 +- data/migrations/env.py | 7 +- data/migrations/generate-schema-migration.sh | 21 ----- data/migrations/migration.sh | 82 ++++++++++++++++++ ...5_calculate_uncompressed_sizes_for_all_.py | 22 +++++ ...4461dc_add_support_for_squashed_images.py} | 36 ++++---- data/model/legacy.py | 2 +- test/data/test.db | Bin 626688 -> 684032 bytes util/gzipstream.py | 9 +- {tools => util}/uncompressedsize.py | 14 ++- 10 files changed, 152 insertions(+), 43 deletions(-) delete mode 100755 data/migrations/generate-schema-migration.sh create mode 100755 data/migrations/migration.sh create mode 100644 data/migrations/versions/2430f55c41d5_calculate_uncompressed_sizes_for_all_.py rename data/migrations/versions/{3f6d26399bd2_add_support_for_squashing_images.py => 3b4d3a4461dc_add_support_for_squashed_images.py} (64%) rename {tools => util}/uncompressedsize.py (84%) diff --git a/data/database.py b/data/database.py index 3ad10b7b2..b0b1c031a 100644 --- a/data/database.py +++ b/data/database.py @@ -168,7 +168,7 @@ class Visibility(BaseModel): class Repository(BaseModel): - namespace_user = ForeignKeyField(User) + namespace_user = ForeignKeyField(User, null=True) name = CharField() visibility = ForeignKeyField(Visibility) description = TextField(null=True) diff --git a/data/migrations/env.py b/data/migrations/env.py index 5f7bb986d..f27a483f7 100644 --- a/data/migrations/env.py +++ b/data/migrations/env.py @@ -17,7 +17,12 @@ from util.morecollections import AttrDict # access to the values within the .ini file in use. db_uri = unquote(app.config['DB_URI']) if 'GENMIGRATE' in os.environ: - db_uri = 'mysql+pymysql://root:password@192.168.59.103/genschema' + docker_host = os.environ.get('DOCKER_HOST') + docker_host_ip = docker_host[len('tcp://'):].split(':')[0] + if os.environ.get('GENMIGRATE') == 'mysql': + db_uri = 'mysql+pymysql://root:password@%s/genschema' % (docker_host_ip) + else: + db_uri = 'postgresql://postgres@%s/genschema' % (docker_host_ip) config = context.config config.set_main_option('sqlalchemy.url', db_uri) diff --git a/data/migrations/generate-schema-migration.sh b/data/migrations/generate-schema-migration.sh deleted file mode 100755 index c7db14b32..000000000 --- a/data/migrations/generate-schema-migration.sh +++ /dev/null @@ -1,21 +0,0 @@ -set -e - -# Run a MySQL database on port 3306 inside of Docker. -docker run --name mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mysql - -# Sleep for 5s to get MySQL get started. -echo 'Sleeping for 5...' -sleep 5 - -# Add the database to mysql. -docker run --link mysql:mysql mysql sh -c 'echo "create database genschema" | mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -P"$MYSQL_PORT_3306_TCP_PORT" -uroot -ppassword' - -# Generate a SQLite database with the schema as defined by the existing alembic model. -GENMIGRATE=true PYTHONPATH=. alembic upgrade head - -# Generate the migration to the current model. -GENMIGRATE=true PYTHONPATH=. alembic revision --autogenerate -m "$@" - -# Kill the MySQL instance. -docker kill mysql -docker rm mysql \ No newline at end of file diff --git a/data/migrations/migration.sh b/data/migrations/migration.sh new file mode 100755 index 000000000..98a12d6ac --- /dev/null +++ b/data/migrations/migration.sh @@ -0,0 +1,82 @@ +set -e + +up_mysql() { + # Run a SQL database on port 3306 inside of Docker. + docker run --name mysql -p 3306:3306 -e MYSQL_ROOT_PASSWORD=password -d mysql + + # Sleep for 5s to get MySQL get started. + echo 'Sleeping for 5...' + sleep 5 + + # Add the database to mysql. + docker run --rm --link mysql:mysql mysql sh -c 'echo "create database genschema" | mysql -h"$MYSQL_PORT_3306_TCP_ADDR" -P"$MYSQL_PORT_3306_TCP_PORT" -uroot -ppassword' +} + +down_mysql() { + docker kill mysql + docker rm mysql +} + +up_postgres() { + # Run a SQL database on port 5432 inside of Docker. + docker run --name postgres -p 5432:5432 -d postgres + + # Sleep for 5s to get SQL get started. + echo 'Sleeping for 5...' + sleep 5 + + # Add the database to postgres. + docker run --rm --link postgres:postgres postgres sh -c 'echo "create database genschema" | psql -h "$POSTGRES_PORT_5432_TCP_ADDR" -p "$POSTGRES_PORT_5432_TCP_PORT" -U postgres' +} + +down_postgres() { + docker kill postgres + docker rm postgres +} + +gen_migrate() { + # Generate the migration to the current model. + GENMIGRATE=$1 PYTHONPATH=. alembic revision --autogenerate -m "$@" + + # Generate a SQLite database with the schema as defined by the existing alembic model. + GENMIGRATE=$1 PYTHONPATH=. alembic upgrade head +} + +test_migrate() { + # Generate a SQLite database with the schema as defined by the existing alembic model. + GENMIGRATE=$1 PYTHONPATH=. alembic upgrade head + + # Downgrade to verify it works in both directions. + COUNT=`ls data/migrations/versions/*.py | wc -l | tr -d ' '` + GENMIGRATE=$1 PYTHONPATH=. alembic downgrade "-$COUNT" +} + +# Test (and generate, if requested) via MySQL. +echo '> Starting MySQL' +up_mysql + +if [ ! -z "$@" ] + then + set +e + echo '> Generating Migration' + gen_migrate "mysql" + set -e + fi + +echo '> Testing Migration (mysql)' +set +e +test_migrate "mysql" +set -e +down_mysql + +# Test via Postgres. +echo '> Starting Postgres' +up_postgres + +echo '> Testing Migration (postgres)' +set +e +test_migrate "postgres" +set -e +down_postgres + + diff --git a/data/migrations/versions/2430f55c41d5_calculate_uncompressed_sizes_for_all_.py b/data/migrations/versions/2430f55c41d5_calculate_uncompressed_sizes_for_all_.py new file mode 100644 index 000000000..df2ed6f14 --- /dev/null +++ b/data/migrations/versions/2430f55c41d5_calculate_uncompressed_sizes_for_all_.py @@ -0,0 +1,22 @@ +"""Calculate uncompressed sizes for all images + +Revision ID: 2430f55c41d5 +Revises: 3b4d3a4461dc +Create Date: 2014-10-07 14:50:04.660315 + +""" + +# revision identifiers, used by Alembic. +revision = '2430f55c41d5' +down_revision = '3b4d3a4461dc' + +from alembic import op +import sqlalchemy as sa +from util.uncompressedsize import backfill_sizes_from_data + + +def upgrade(tables): + backfill_sizes_from_data() + +def downgrade(tables): + pass diff --git a/data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py b/data/migrations/versions/3b4d3a4461dc_add_support_for_squashed_images.py similarity index 64% rename from data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py rename to data/migrations/versions/3b4d3a4461dc_add_support_for_squashed_images.py index 932ffde51..7d85f0508 100644 --- a/data/migrations/versions/3f6d26399bd2_add_support_for_squashing_images.py +++ b/data/migrations/versions/3b4d3a4461dc_add_support_for_squashed_images.py @@ -1,25 +1,24 @@ -"""add support for squashing images +"""Add support for squashed images -Revision ID: 3f6d26399bd2 -Revises: 34fd69f63809 -Create Date: 2014-09-22 14:37:30.821785 +Revision ID: 3b4d3a4461dc +Revises: b1d41e2071b +Create Date: 2014-10-07 14:49:13.105746 """ # revision identifiers, used by Alembic. -revision = '3f6d26399bd2' -down_revision = '34fd69f63809' +revision = '3b4d3a4461dc' +down_revision = 'b1d41e2071b' from alembic import op import sqlalchemy as sa - def upgrade(tables): ### commands auto generated by Alembic - please adjust! ### op.create_table('imagestoragetransformation', sa.Column('id', sa.Integer(), nullable=False), sa.Column('name', sa.String(length=255), nullable=False), - sa.PrimaryKeyConstraint('id') + sa.PrimaryKeyConstraint('id', name=op.f('pk_imagestoragetransformation')) ) op.create_index('imagestoragetransformation_name', 'imagestoragetransformation', ['name'], unique=True) op.create_table('derivedimagestorage', @@ -27,10 +26,10 @@ def upgrade(tables): sa.Column('source_id', sa.Integer(), nullable=True), sa.Column('derivative_id', sa.Integer(), nullable=False), sa.Column('transformation_id', sa.Integer(), nullable=False), - sa.ForeignKeyConstraint(['derivative_id'], ['imagestorage.id'], ), - sa.ForeignKeyConstraint(['source_id'], ['imagestorage.id'], ), - sa.ForeignKeyConstraint(['transformation_id'], ['imagestoragetransformation.id'], ), - sa.PrimaryKeyConstraint('id') + sa.ForeignKeyConstraint(['derivative_id'], ['imagestorage.id'], name=op.f('fk_derivedimagestorage_derivative_id_imagestorage')), + sa.ForeignKeyConstraint(['source_id'], ['imagestorage.id'], name=op.f('fk_derivedimagestorage_source_id_imagestorage')), + sa.ForeignKeyConstraint(['transformation_id'], ['imagestoragetransformation.id'], name=op.f('fk_dis_transformation_id_ist')), + sa.PrimaryKeyConstraint('id', name=op.f('pk_derivedimagestorage')) ) op.create_index('derivedimagestorage_derivative_id', 'derivedimagestorage', ['derivative_id'], unique=False) op.create_index('derivedimagestorage_source_id', 'derivedimagestorage', ['source_id'], unique=False) @@ -38,18 +37,21 @@ def upgrade(tables): op.create_index('derivedimagestorage_transformation_id', 'derivedimagestorage', ['transformation_id'], unique=False) op.drop_index('image_repository_id_docker_image_id', table_name='image') op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=True) + op.drop_index('imagestorage_uuid', table_name='imagestorage') + op.create_index('imagestorage_uuid', 'imagestorage', ['uuid'], unique=False) + op.drop_column(u'repository', 'namespace') + op.create_index('repository_namespace_user_id', 'repository', ['namespace_user_id'], unique=False) ### end Alembic commands ### def downgrade(tables): ### commands auto generated by Alembic - please adjust! ### + op.drop_index('repository_namespace_user_id', table_name='repository') + op.add_column(u'repository', sa.Column('namespace', sa.String(length=255), nullable=True)) + op.drop_index('imagestorage_uuid', table_name='imagestorage') + op.create_index('imagestorage_uuid', 'imagestorage', ['uuid'], unique=True) op.drop_index('image_repository_id_docker_image_id', table_name='image') op.create_index('image_repository_id_docker_image_id', 'image', ['repository_id', 'docker_image_id'], unique=False) - op.drop_index('derivedimagestorage_transformation_id', table_name='derivedimagestorage') - op.drop_index('derivedimagestorage_source_id_transformation_id', table_name='derivedimagestorage') - op.drop_index('derivedimagestorage_source_id', table_name='derivedimagestorage') - op.drop_index('derivedimagestorage_derivative_id', table_name='derivedimagestorage') op.drop_table('derivedimagestorage') - op.drop_index('imagestoragetransformation_name', table_name='imagestoragetransformation') op.drop_table('imagestoragetransformation') ### end Alembic commands ### diff --git a/data/model/legacy.py b/data/model/legacy.py index fae1d694a..e930d82d0 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -13,7 +13,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor Notification, ImageStorageLocation, ImageStoragePlacement, ExternalNotificationEvent, ExternalNotificationMethod, RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite, - random_string_generator, db, BUILD_PHASE) + DerivedImageStorage, random_string_generator, db, BUILD_PHASE) from peewee import JOIN_LEFT_OUTER, fn from util.validation import (validate_username, validate_email, validate_password, INVALID_PASSWORD_MESSAGE) diff --git a/test/data/test.db b/test/data/test.db index 4be5d0b90e533ee3120dc04b47e6b0836a596007..16e215a0584c7226266a8b7f273e33e15eae78f7 100644 GIT binary patch delta 43347 zcmeIb34B!5^*H|CtnbbC61K3Dgs_A#nYYZFHxod}M##P|fGlse5F`s(*kKr4L9E(o zz1k{JcPs9exKu^ks&&PEK?VEmx3#rat=6_~{Lj5F2}#J{DE$Aw|L^lD%Dlt9_nmX^ zd3QVKo_o&D12cCX&%9z%@==PSZh^o3e|~i3T!}t<uj_dOHO@05}iH9oYE-K(b|@_U}#e?)Y-Bj6l`f#n?qgQ?Va#g)!nJKb%onI zTh;EC_O?|m!4V3L&95oUtt(6|t|%yMN=+Z3#Hnb~Q!6VXBbH51ot=IP)%470(r9x^ z5!6+RTi34kp3XpMq5gn%v>NpJPndY_0^~@H?OR4fDPzM z7c(0D`(`?I>PZg3C$B3kDy*5?R}424-Se7h0z2>pQT2=TMTRD>YExT78cRpSGeDV?%Z$$D%qh7*IzSj2|CP8oy0#?r;?4BLK@8e)qhW5W z!#;&Ac9>H_3cX8@l5eADpyz%`+hXsU#?leFPK@BdMo{b&SpW%EwqoeV$3!A z=tLI%>}J}HI&PsE^!m;8%DExNoZ^~6NA;)hcvVkN%aA#O9jJ#{51Fy|&0`q+v2eNG zZ8y=A>H8Gho%%`ZH^KC@vKf9#YT-vd2Yx2Zg`bqs_a+%uk1}$9f`3pqabIy?aG!F2 z}dBqcS`&F+L=3{qvAV#6LR+Z~Uv$4wc z8yK0%{B_AW2VRXmzkWYs{@q*SE${#Sl2~`=K4#pD-`+Lt<8NojCUz}jl#A*O#!uo#%W#sXXJ$SV#8;W>f(>JRz3zR_yQH`T{LB2}#}9rKyKY0db)I?V?KL-CyE{5L8U120 zE63(+T*Zvu^Ulmk=hC{^l^ZuO<1XC)#I%c#-4^3E?PsRA9?zZqPWR`rL!0(7p0`Ku zE_>#KnXz*>FJq=$HOH8F-m9&#M>m&SeRj_imB-3%i%zzp$N$5~vC=K8EGgGL+?|$W zYTsr-tG;C1u@AN^V#cSwcX`@V*L@Y^x9(@s4o-Q)m67>m?2WDa7+vtstM$v09*k|@ zwv3r`%jmp$hwi^Tc5GWYBOU+f*(JXy`%BEd9a$#6cI@bkD_=gb&46}(14!8&i6;kdQ2>K*%;@t;5UT>2-~e~tZn$3DjX!VMjx>vpun_C=ON;49t`)xq!n_3(3D zBm7)F4}Q=ToJe!8z%TU+?iEXzdCn{vuhxI73sTQGnr+WpCH5Ng2~)`MtX`lIcb8+V zZL8&dw#a;kX|!Rp?igLjS?%ksZ&(&EH<(#tyZ$xZJja9fY1Rwb51DFHuW^e0e7fZ? z+zR_|Y?+qJnSYv>8y`2!(p^S<#=Yq{$9ALD!giQnH_bO(uV?5s?jrl&Y&Dh#*s10V zjDIy$=?>A89e=PdvEFGJ!)!4fH!jv6)QxgnVf(^*F8c&C+w>#D=lW&zVO;;&`g$6@ z^baP(xQc)sOV|wKN({!1b!8Y=ki>hwh5U0#V%nAr<8qQ%SqLSUk;F$kGK@<};uT*( z$tDc`cw>gKk)&nkLBR$B5<>L^B!ucPNC?%EL_(;BBw|9Zkp%M%dLd7JJ5L!YKF`*)oNC+(^iI`9!NhE{{Fo+4|lQcpokAQ?w zE&&OlMHnQ67Lr6l=p2%W2`wOrgwTAFhzZRjiG)xN1~DNYNh5^31SEtM0un+V3=%>z zNhE~aBoPymNForLVIXOkjzH1~9Ug<2j*FxbI@tsybesevbmn4^(8(f+gw7n2i0NdK zMAD$yBoPyuMG^_2nHa=`W{@;OD1(56&~yS4Lg^SJgwjYNA(ToIF`;QBkr0|n5;37E zB#{uBj6qCj5=kS3CK8Yknm|B8Xgmf9p%jux2#q6&n9x{~NC=G~iI~u6l1K=R!XPG; zOwtIUBmxpb903U-2L=ftJ4qyjY$OpAvXVqX$U+h^A(kW(LJS5mAu~xMgiHh^gp33v zgtSkL$E%(s0-;m}2KRx1BDCS&orYBXUhBm%^MQY*+tO?kY{@p(dcyio>xb6&t#4SL zxBkZZp!F{6P1c`Vue4r#Uvg@*C1Q@+43v(h_Ug=0D}gQr$U>k!0I>wx4G=@1O8_zx z=wg6O1iA3P zKwUtd=TqmC=Pqg&c}A%ydG4fk8ud{32>#u%1OIN{j(@k&+wMz!FjRYj zeYD+fH$%1mWBZ5gL)#y1@7i9sy$BV5-1d;|KHEX4_Fikmq^EJcwa2B}V~_UOtvxQ$ z9v5qmi?qju+T#N4alZE0r9GnBgVa<U<5_rK5If z5UktKIWgUx)8Vh#K;u3XA9*ZB=bg9p_ZgDD;7ncXd(^Hq_ojZy z88F!U?LyjA#gEq6D4k7rvtH1@YuIUY8DBSjY;G`r#_VFBv;?h2>r1ws_F0Y>xK_t5 z&PnZwC8sW>4ZVvG&~0>Vwmqp2+T;kvF0|8v69cSxTJ2%qf#LFsdtpm}@W4*!mk@?S-o}@IsGG~zdpO~KH`r}DIp$+Fupg*GMk91e-&(nWmSZVkl zW3}-Qrq$+M=D#sZnb+9m>{FI8mc5og*tXhUvro0R*`If$a}meOTnQD`jf%b(z>j6Q9(`}rD72X)s-ZP1M~r_2DINfA*F zt!r-%#dy=0L49@!of5mpm`P8>b&GvsbkXBuPGbf=2{{kaE9nWbM-7>vEW?@K7_xPa ziLq?MTzX1uxk1%!n-seXWa6oiTcY1JZF1~Kx~cTE8}5LWdd(D+br(IGPK_N0V(TzC zHFk*3q|+cFSGQ$aEQijf)BAJT*dBV0KB`YehaaZ}dOA+O@`NvCZJIe{>{vRgkm`4~ zuM6S78EDII=mt6+z4RM;Jm{68>CCvReoIfGr=zLA1>Md}L_bb1LLGO|6LfVM2(nV? zS*Z87v^;JG!FLfy@bz?sI`QAxy}IAgyXa{%%_*~h7){0l&RB(iu$m1&Gcgm;15eU3 z0ok!9=?U~4^q(i`>`AjtDQRQro|A8dTGf_y_?0a3O5;;NKND?wicX{FqMtoQ&xbeP zcna$4ME`t>R_H9`d>R;(-P`aqEz-@so|Dq6H_ zbS_KLLbe78*Rtg3?O_W2?qRSkJ1kIYL!L6n8C=n*6#9G2a~nw}uOxabDA zJcOI8eJu3#KC>lZCP+R>mK0!uJuxOo;ea5h0e>*)4e%aOmISZc<5qp}nH63N`@*s} z6q2M!SPrO)3Qbga{GZn!c8B5XOT)i^g81U1J3CC^MZqHj6Kshw!7C_YKv6wj+#v#9 zw;=chzt8LCLvlz62mJ0}Q1Ao-k)X#b2P7B@-jGM|h$=h<4|Ez~SWie0UtDx&g$aV< zQ`{mj!I~HoRNf7L{BFM(3b_Lz)gJ%^iaYFghrKE<_``~%xCL*74=B*Z#Be|i0-HjC zu#XRj-k>skI3$QKF1oYA1l}VHJZ6F=F(!mOyzCB3-jEawN&&&=Qv(5CC?tElN-z{w zf^tCgg}q87;0}3xZZYU{!!Yr?eY{`v^5I~3I1>`Y7Z=@GV1k?t)1smXzyvliCdlEC z9Q5(Qu+Jj~0s%D;g0&$Cvq3267i8!os$W(3P()P%A%8e1!z8H$ylM#65tu+jVehcv zkRZOe=+2x8ZYf(4Js!6NOkfgYg2an*NLKj(EGt3D?UNO+75@}A12{0kTr&WUR#eE_V zE2X5sxc3Qek0460EJCB@fZG>=?h*oa1w#Vw_QQ(73t_Kc5yR>*CM1Y2F1oYA1UH`6 z@SI>uj0pm)bRx9J1M5C)5upFW*bnnEulQkf_(P&QD1`Z7#Lb7@9>A=I!yaB$eQwOE zkfeBqF(E;GanYR>CP+R(5^+y3CdPy?O!z?+c7y`-{eW9_%RV2hkuon!Ay@$1-f+kp z6y*rs9Kp&dd!RoA{hk199$|)nv|&t05MNw$XN3u(0P8(wf*~;`K%;c7tj#09!&KAge+IHbyW)f;=y)ilC~{IRw8d1jOMzAwhg`(VaOHBww}+ zUyH{RqdqYv1cXoszBey;L%|>~y4}1-@rS)3H7NO2Nd^JI<57KLBpCLKZk|_tA;l+2 zf(%`T4|xTjIIJfmh%YX>v%&0+tmYR5*l)oel2>KF zBDrBx1T7ClBo$^$FYKm7uRE-$f)JKHiYy4jcZ><*i;M28FhO!FUauFJKqtlo+z!bT zQbAT!Jupdn+)d7`lfNbO%L`&kGV|K#)(_CnSb1 zF1ph(VQE5Ar0B_(1(2?92H}~alDUHv_ciw~?jPJo+y~tMaPM(%aj$YOa?fzT;|_BV zbBDNlIDDwHi$iz6Xf#YuHt1P83IA~T$AN$B_{WBStoR4L^rCSriy0YB_d0&WquW&-c`NxUS(Vg@o^2);O2u7pL{AA<4Vb|syT z#_9ADk;D}f+;`IW0TO#;s0P6Coixr=Aa>b@kcf<^!%0kjjVo`uY(r>_M514si+qQ8 z7+0KgS-+D;BEnJxq_4QrlFRa)G|p5OiMZJBq;aNOeHRm-M5JGBm)Ooc?oQfej!&bH z#EARU$f+COxEocM={sqB?`eK{Q4tp=}W& z-?`_AyGL;8R9HLM(Lm#wLQxp>){ za?~+AJJPQ~hJg!uL;LXj{%lR7W{a1t>6)w~XZLGiEF)w`J$K?`yUZ3+zslzogTb)|D(SsICINdBu|Aiu}f=h61nKEh-K2GM=xI78fjTlm%H)sv620nwHfs zYb^EEl^5pK)GNN?>Luj`H6!8&9lV65e?=E0Yhy#BiWw=QO<&QeAYV6ZsBJs1w!K7@ zdgRcCVC%Z3hV1fsPfb-Yv`lOdgG*K z?V5lm%!~f2E#b!XXwf%x%E@ZV*&yM|qJq|ZL(g2gbg9o<;V!Evt|+d{=gS%j>ndyN z>I;`FsZ|PmwIz#-iUcXwS6f_I?5+h`rMYFfHG;RHqQFz$P~vGk^}}NH0_?oepTB_$ zH&pONh2?8E3mfX2H{`bYeXZR^au0uA*H|p$^lZx6IieaHs^dB5%1-m=j z{q5bJqWWB=qP(i4DpxAZuWL~1%d4fO)%6}ZKcVuTTTfAmTc~$Px%5`h27dKXx<(Bk> zOI$VH((}aPyiFDU<~7}|a-P4e!MCwpZ1!xd%twb$(5{o*ac6_934+f3IxwYlNZj>Y znqzZsQmSsYBXXwGg00XHwbMU%@SFQaKYBD<*HUnXJq=9F7t*+AOh6a7bkhsZ^yBDK z^fHTiIpz?rdn5add9`(Wsoo5Ro>6V+e zk8Ax*OZ(2b;N~lI{8d-bhP+Yq4vOAEf1x|5uQL47=rrDI-eCTQxyo|4b(Xc)w$%2q zeV5}2uEX&p_Yml)Hd0;K6yoHc4NhcvhURG=TTDzrP0!GCm~3;(a776-2&(bwek$0a3dl#wK_8dKjW(0GJ z&qYUPpvRx5Q&9c$P}ofh?|dF^K#Ay~=V=e^L7zTPm(vnj_yRqbR(jXHK>vi+M-6WD zi!{B5_M*cx^<&V-srq@ej9L%Vvr$#Lehlp!NVlRrhv7OEq#DqgOuY^5PuHgb+Q-xN zbLpJkzo+Xzq-h1codGsH^YB~KP}U6nOvVeB8l7;d5xt+GABWm!=qJ3uQ{RGACx})9D4I%vCe>$*6Xwo@a94MjRBGrxiIiQ|~}`&D1B+=iqWn zf#Sqj`W$+pMp&7pw`0r`(a&e;lY!nHv-DniQSYB-=}k1C{%E$o2(FQlCV!L(B$6@# z-$yf{RmGY5By>ThejH%_d8U2=oi|Wy-l4;~ucpo}UR>c(DjTXB>gsCiN{gG6#RZLG zOS}zugRZZxzW4ak=$uV8!>}o-&sg#6ec}jItLsLm(zBkWXm0wcX zR0mst>awD$np(LsZ&_0>yz+^wIuP{A_&C4`mLUNroQwFJUbrIW1SOSQfKvgV;N!72 zD@P5swzOgMmG1WQLT!TeXh&a zU8Vb9eU|>9G0*sfDbMtr`S*;#T*GF-;7PS!Wi#1Y9lzvixF)Lo%G`0|cPR^rNz?_^ z#2%zn%tk z=?w-4O1{w`0``I%4HZbXh>8UjqMIb|u_`fWrn-3B<{{-t3Q8h-~MZo=8=I3b_5^wNvy z6VndyhF@1?D1J381;et_AA)}C4$7+22j)IbH&{Cj4rG%z4;?n#$CW3-8Qm;>Jv*q9aqD>Kh+z{K2}oWA?BqgDk@xdX zI5rfVey<=p!KNet#uI{@56XI-OUF=jJ=p2d7O%*QLD?%g1yzEt6Y_fC>qtIehtCgZ z&mKP*rs?!9n!G?$w1roJfKODM0W~bZl~mCO{PMup^?)MY>{I-302A*0_LxD}!P{Y_ z978=qQIF6e`eWTTeV+cXVW#nbX`<;D=3M6A>~70M%Olnb+i2UHj^AVHVS1PRWOYSz5un4nptWd=LsQ*9t;{3p)6*gN8Et+<|u|puhdnFa=$3 zhhZ9Y?;G!c(Xs;l?heDbP>1w84KwIfWUQ{Z6UNvwwEIrTUybfLVjSQ5#GNq49h;WJ z)R6`=3z?hH-7g!*Tk+(qoeZfc?G@t$^zh3@5orD$Km0hU7Krq`VpQk=wl{@I=oRA< zdL`O+#OS7jy?{PS)2lFp($Gh*87I*pB)nmqj$t$U?d!&=HZlnk!(zv3t=x{+pGl+_#LCP~U@Z8NKv* z8mGQ_%lLB|>UPah;{tjeZvS}n@lm6XZbr#(nZ}`o?*OsZfpj}6duK=`lio2Vp~4vq zcc{u}NSbW3)l%Hu+*;1$cmXtjv+d8=*V|L!9@lPLEo_b@TdUhxv?b}(z!*S;sKjYXF&3Y z@WcTlC!~6u;gCo5f>a46o)S8|&Nx3q0z(xzQWu?G58wuKjvx#fS#Sp7;L-anEOId5~c>HPvKRk7b+;hECFU$GHPox&ObK2nHTvENfOfo7J}N9<@y;fhmV9gnZ6WSdyVaK3EIj4!hIs3n~&+ zISe+A$kc9Jlp(0NzZ;qFHcr*w z%{>HIF5%#y7nGFfSC1Jh$0u>O_a7<0&%FbrU*;Z%0djlq#KXqR`)Zej50k5|I&3xo zuIu4OWE!B^!o^Y_G3abb=+=*o=Z;C@Zlk#SNK=o)f%K!?o!o7`<32H7Ie_EN6UGL3 z|H_}5LF;=Zw;yn{aAvs8Nz;JjYRzrkyO;uh{oZcp#HBvU6Y$KrCm zS57l^Rax|oXDRL)PINpASRQdiLVA`0)exOavsLcm+|r7QDxrE=b-ui~s%lxm5-@R) zsw$Nw73G!7%A|q{*;i9syL4&3&?q%1%akg`SG1&VvE*J_T9RiQv=_#UHdt~i*N3z( zNkw^eRYGNHRer_dyoP!uv?Nz5t}iOBEh#NnR#zuB`U*f3C^l47@)9$wJ(}W{2kSW* z6l!2oKz&3}AJKbtv-S5HCL4ZcG?{=)h6q zWHkR>BYV}m#&L9;R?_vZF`wyM!{=#5AbTp^K4?yV`(0Su)*|jbW4gU#um=`&I13zq zY<`+f?d^QeID>{&qGkNJAvf!Z0-ZhQxZGFvDqJt{H5;GmvWz%$e!$7<2IN{_vm_>IW?<=Mv zdgGuHN8U6oqSxcs7t@;t(o5-1^y{}w`Sj+2Bs)6x7A&Q%w@ukl!m}PiF$%%$UsYQoYf2-Xu1NZ44cq6I?XUTlw|yj zv)lCe%Cb#o(97?y34em%YdcF4?6;{GbrQDLdXqkHQZfshCqp}U_nB0 ztG$mtX39&t>VV1AYdUT+SDcg_K?;@_VNh5r#9wDNp#Qzr zJbQ!!@ZLn^z0RCBLLR)FiSED7JYnR#1!M7bAgN#S8=y&0mh3anAo=1c`J_$<_nBwK z$^T%Vxr$^7r@RWN$0GlJb5@-E>-U>ID{-w(d37sXWl~%5SJn>Oy3)7f#bF%vGm35p z{pb0HqsBtxuS~N+Fw>cvnfqXYSjWC<*=W7jmSwx&-r%^ND}us3vE6&3?r=+#v4+N6mX>C6<=oW@Kr z8#aR!2MbKkY=(zh_k(7kOJ_4y^ularrunpskeph-)toYAitduFM7M-*AGUROZtj0l zyL#c~tfDio*#<7brs&YSSxh=O0zlO-n^I89TxPbpZM!)|ftaU z?|`O4&axw>bR!_8F48jB9?=$wH1oy?hjO9@S;7id(2=BG4aQz%{f5m^#BQC2B@TW-znp%e7T9X(sb8J$z==ID_HoE3|KNUh=8to~g7UGx{jVK8{Gz!S%ELxKu4yKe-^ZcDj~NRrcJ4k3#D+Jx`+9B1%{OuA zlT`*IZATA&Y%ZN@ceI_xPzA7*IR*?=$let18OeNoijBz73Gfsfk)heA*oX|xKE*~P zGBp1b8|4`)J8;`FM+QQ!RiUZXCaqi;Oj-o)<;^U+Y&16e)|Cf2G-{*i-DucB|cB`^NT#?PKr=^}g+G+bg!` zY`?QTYU{P3+~Z99eW|JGnsX(a=3L1}5$8%aia1xYQN+2DjUvvKY!q>>WTS|4B^yPY zE7>UGT**ce=Snt;I9IY!QS4mFrn!N#5$8%a;#|o_oGaOgb0sTru4LWP>ndm7(b3oU z+NzjU9DOZXw~8sHe~unl#gyr1+iybV)l8+%eiN)M<@O}UY$G*6N3Vl3l&)_0OufHq zgH&w@q{q&?W}}n{^AY5YM&_MRiW?2N4?8ri&}=l}a^^!I@GoS( z0u%TbWbU`yZ3b{xVAJr=?ltUV9!s83T?q%7?!2a=GT*Ys^5VKI+C$mG?>z>r5(p#x-sFhddPDIN;VUQ(j?E4qauYRp|TTZNc zy&LhLGKZba#;&!C(6oOr`z#A5 zo!j2DG`eQB5}VxRMqhy!ON;!3V{U2Q@f&MBu^o`M;xi`2>Z+)jmzUEuEjn37JM&o? z-TxW0%3}HBdjW3I+CAIcXleoLM%AA)i>xy~TNston*JQkkYb;9%jl)gnP$dpQfk=O zcHWE@{S&B;N-O_ng8Ir&R|~NP8{BB}7eF=X-!FZT^2B4CYWdiHNZauRP|dvN$e(i- zJem}p>>{31(Q97-RnzSJdS>5mf4MCiEi3}6o&N%=z6bM;R2`Yq5Y2F+M~k3>|M(Y> zRen>tx+1ydBBcHs$a?M${<7?fy?*04lLdU)aGQx>%I-x;Q`s8y z@D#S5Z4k^@xjTzSAJ1hMp@UOc4lz^M<>;4_*_EhiGF!ve%A`mdjrM0lk+R94%fE9H zyj!A>430*33G5=9(WR7oNy=CnUCy(MOh%V1yS(V}i9l`LM7AD{naDPw2PXjfQL_)MM>BxF@~)JIc{qZ8O}|b5rQsUG-(e^Gh3Pu;B=bWI$E;^l*hek% zEk~`q^~W}^?U;Rq{cA_H<34UFTJ#C13|1{-e`;Cqz&H0av;?xYHIXSm%w-ob^B#Gl zsrH7|ccR-X*>Vf_ochGLsp%V5FVOl%C7WTj{p^U6eADhfMW>&mb)pJ3%#59R%_CE; z?RX7+UC!=ja(+B~g8m$4}p z&&-Q{RQlp>=jwc||*!?PI3ffZQ}t4GdP(1S$wERt>q6HqVS|i(qT(;5SO4N5dbkMajb3!+qZ2dV7 zVmFwl1eYJpoH-F)x0t=!GW(|bA9?@0@yXf>G6_W%1G&-DpGlhd?ETwTSCYwU%3?Ny zalbSstN7Dh*Q4A*cAs_XIXBqmG~SaG&8Q|b-pd7SGczIcGX3h&AP zUNkp?w&z2ulzr!3T>kF;kJQ$Y8FpnpkePAwIlprMYR@;T>&Z;~HF*4Gru_J&hccUI z>_e$}Kqh16@-gDI6DCJ98e>P+%jn)*wmC9=sofxKyZvRnsLU{AQHCtTPQ!D?Nyb*N zLvol_nC>zg%;%bWnXxc=z0Kybm$C0yW?ME|es7&*?XdpZX0tVcPtVWo1@3NmCb|)Hnh#U0@Ly@vLb^Gpi=3$gBOC0+OQUQb zy6=4@Ffz<35TMJN(q=Wx0+z5NH+E-8(g`>>7VS0*xoqnfxE{Q%v?rBPEKy8b(T zKlDl;V7sU^5;gX-E-~LJac}Pdx(iFAa7;}&m*_>kNIWMbfbN3QDA*lqA0jb#9&z1o z0Jif>!KbZ;ZIB;}$mtwo#Jzq8=ysJx!Sa|?Zp52*30|~|U=I_3E?OF;`p|vvOLfEU z)B(1gB@qo7{av@`=Jh!}i0h=l=8BZ;=tDN_njCs?G%T5|qhVp1IvN(H=SIOYx^Wbw zWsQQgcakA(Pco#*$&mJL65GTsZ6c_gumWy_WK3v6)MY*eB z;eB@{EW8_5vdw7DO2Bt?1zUx#SOHM(3bq3M{anC&E&eGw7tnvT95QZS&Q_z<7)n|W zg@3z@ErZii{FPE@^x{^){_ABdgJwfIdUGk1*t-;<`55}737`W_0M%j0fT0H(+0t|H z#5hs6iqfspy$g%%O9sjCmT@O+d9E}4!(3S<$}!*Z zKb)#dL+31F)6k05u*^=*S?Zs4!T;>uwgHw|3At1@*J@ZB{>GGjg)5rT15>ga-KDZ? zn2A??CC%_XIR;Jhv*+kWT$NWWViD87Zll}VVT~5XJl9rmsQ3BR%e6JS9Tv}7pWN|) z^4GmDM5mvttQr%t+1GhHH%(d+%@ZRw4)W4nT1ci+_JRfvsLKW zI#_0=y!^+1WFCHdTJ36Wja~=KOq%L@A^YK~k5;SN8odt4@PGSha>0(ZrP1krZH+z; z$c#T8DocOmaRWNK7RV%@lXlVg=}%vRR;&dw*2d4aaPMD|j2>+PGIKgUyLH~$zxURL zVU3p1z7}@8Wt!V~Vb)zaAFXbNHCjURTi6-Q9riUvR2XjtywSyt;?+!SnsgDZara}ZL5a8^$oV?%&(al(>&Wh!R2X@y~BQu z{R#Vru-9I|yK?vMC~CZJ*oh9TmitT0M3vC)>VpIaeBa9U6H_} zW6yE7wz6g)skfq<-l?YbLmZo$S58)6AgIV^$6Db!04L)66tDVPp}$(Q5|h5~@p z>j?o;5lps1V6z1lVv-VaQy1Zq;1tCl3JWr}`*MRxS^%v0RJY#=mwN*tH~7p8OCkl< z`w=J#=k!6rt2)7r7PLtS2u{@lHm~4&kmo(Vpg$DyQs-l&vR_hU3C{R&t)TVr4wx?c zz$ncPp4h@(uPFPdC@#qdgQ67T17L-Qt?a<_2w0(k`I;j8{9u#sQF(uW+6kWZ=g^82 z40)mXPE`O)KrkTy<2kT2b_P`$-2VBz{!l>L-tUn=!?=Zj#Nq!Y0und>8wg0e`*#zN zc=zujAc+{zK|u0a8v)5{>j+3*TT4LlS_=WmYhePC*MbBjulY}w)dC0%3R*xwEl>gh zwcrc{)B-{fP>Vr9KrQYC0kv2g1k~bt5KxOLax%mLA|MH?pn)Wmg9eiD5g61WR%nSN zNQMTIz#JM#f`Mot2|$8KgTXEeVwn(-1X<8P64*flNiYcwBmpcmki^W;KoX}z14(QU z4J7eMG?2tVL4@gABnAR%(H{t?MVug@7UhC~T4W6ZYQa1Rm`CbMKyA=I0;sq1e*wmsYO=frJ%q#fnv5WH!#mJ|754KMQjIq z4f_NHt2qvWSiTXl9}ZsztjoPv*WrtSBg|BrFEI7? zuLGm{7Xt1LIJ#eM`jH8%rgt02)aC~tncxA-?*waEFfoN8RS>2F@H)%$)Fq@O_+(TB8JpM!z}VLB55r{! zZJ7IlydPXON&)IZGPS8P+{h{XE$354Hh#SnhuJPc@WtRcDhjb;MjH$nu$Zv-%e z-%v52y+MMWXdnr+qJilr3(P$UdQO6iPJ$&T!Mc;+N_>4H2K2RHC%7UcfC^61Vzv;7 zgoz;#iA+Nv5(|ewB>WD6NW>okksw0^B5{ofLrEBwlaX5pM1sW-h{UHMP!_Hqfk+S@ z0xcX!D;|Jq2cQ)g8VKrw0TMdU-$4NKg-ij)`H2 zM5`eX36Mh|5|f8O!oVBz2B5+LsInhomg2d=L;VJsJJ`~6{h#!8w2l6=VUw{+Z`JYo z>*#vjX6iRa#&DS4r+dQSiY2F3MwZ}-2j#dPkEu)g+t%;JDRO!@#BsGd)Wv<6&QP%vfjwM5 zo6b}Y_aPdj#2Ko?i465_8dr%!?drpHCi)w! zzM-Ohh&0U&_}KamH=A+YVp2P`%iXr`QrnzPYZJk)D*LXj+lHxZ;<z;8(UxmJh zhr`f_9uv3sA^L6~MW=UBv`xcfy{evF)O)OlongsNHZ)O&Cc_;@yYW8LGSi#pjppZ= zZ01#Vt7VqeZT-e}-2QLJmxlKZcN-#x2%LPD8f2{G&!sLeHyBu70{aniEdcKN2)cb?Z`{n^OZlo$z$F zwLAL~ot>dTduwZ`E!c%$>1<7Pc7{`38`MtMx)#3+P{6aRyUR5v73XX3*OsN8o4%&I zyCcVi!vSQsZthwiS(gonTu=j7LHowGb?s`f%T>O)wz|xjQN9!&tIBgrDr;Q0?7IG> zhQgZK;>rr=x|X({P0nVZ7!b2%=O(XbmB*dkt#)QNZ#g$T)!Cj(Uh7L4u_h6hhB|vq zD7)>fS0Y>O3Y=OIoJuMKY>E)(cl2~NhcKg^PPM1I9k|=tz9FQs4fC(LqZzU`KxpFd z<{@nwK-GF)u%$EA)zbwZpgo|j(^^HS06$Wlo3@09yqN0DC#54piaVX)fvu&(37@2+ zy$x( zJ?Qz+0w%#%}{p-oCCkV<$C-&f;39&)p`Zl9H!mAY+PYIkQ( zXkO}et+IV}?dYlPgu^olof}}skM`VTdc17s4UUyv7Gm%)3+x?ox6eUe`q>$#@sq}+ zJ+=#LWA|Cd%~GD4roVsNV1=!vUtv3dm0CmC%mM*%{WXPrOqWoF9E6I(-YMY~ASXAHiARSWCteQ@XEt2Yr1RyFW5P zYRH~mbj%*j+y>KFG&47gqIFCn;tKTQuAbJct~KD((~|?YEU?q_01p=$y@~*Oosgdg zb%_`V!9_$6hag0*$KzG~;2&7_xP4)_A77IMAA+Fp5g>{L%!(imd7J_}Ubub^x6&Z!2%f{mR9sbjyB!>- zxh0HUTdW^mdq^5BLUv}N!Kz#TJ$djVhWZnzW6OKz3-d1MIufnEF)r|)h)0C&!Z zx7(WyE~B9bEKhggX`pKjTt- z=Tf`IM5+E}ZPXi>L{yH$nIi$O^cD1-qf7^> zU2s99ztxSJl8C!*y#3{6@ei^$TGbvV9{{>j2&yNz8_Bfme69)4e!9uRzLo!-Zaj&-Z%sA>w&~ zln0Ry#6n@QoW7!AEcG^Jw{}t!4Hr_(I3~>8%j(z)b{`x%ooS+jGG6QWiE%79({LeZ3X2Vqs4Z%PdBRSE<^7pz9c%*@*zv5xPchve zY;v#J?nm`~1||{RnW`KTs81x$T>@&));>#;_OoC~(r=i4DqA8WUumE&>FTo;S*1He zm4f@UT!_GpiIX=fsEf|)vjVBqeIK>KNLqu_k|k8B3j)!IX4kP0j-nEZcM?}45T6{i zYk8l2hD&#feMV&DU>b3y{8PzLJ8Lu>i)r)^5ZK2LiO+Bn`wrCH2=)+@8^NC8xdyOj z*x3LOkD))-gJs0#e#;2g2C$5HryeXLmOwiCcO6?5*+w!s`b_0@n`>Le_C7-2Lx~U> zQHh|nZ980FTZAodeh9NLuNpM#m|3T>j)912gF=Fw#yV#3u#DJlrgHE+$er3rzN3$< z>a!+Nbec9Q@|^)|;`;4A*#QPm6;Q1_O4KTsz&MHEz(8_yp8-}8_!>zlp3-Ryum&?~ z_>C;;eEWOU9IbNNoDqp1zpA~@WUWz`h{yTUIh-GvQ{O#jBDqX8xyoq#U zpOs&;ZX_$e?`PzPIR)3K61%)cUCfQ6CTf)%Zmt;Vvy0rQJDt9KM0NS8c9Ez1euL-# z!8hut-Q2xYQeUSdkt(B1)T(V2%(~N^D?}2BLvfn#H|X=lyXHr(a#i$w*5Oe%D!*sg z4KTXfYj@m6S#(D!<^s0HGQ(VN+GV`g@N>u0`rqr0!ba&aT_n0TYU|%~W`Z=B$nomX zEvFaU>iAVJgzytW;GGPtMikgcLL9dM%m)z9(Hji$p)hz3lE4LzPxh%n5FPyx5)mR> z1*gDS>cF0}Uy~_H5Hv*rv5`o@L;@so5{Oe^MA5B+N^HS^PLkw629~ZCkbhty@iF+K z6;W%SnA-&nOT;8*Xu}}P68K00r!BC(gL*@?K}vwAeGt3L2gkkOCqe>YAHvmy!YXX2 z-Qd_8L@PgzVis9}OAN^Y#8ofs)>s;aClOl?&=mP-)k&ea|0$l1Cb*Zxt4 z+BG`L_J5bHP<;@%ztj3=X!Wtu8zdeGEad@zP$K*fv0C6ufR8CJ1k4HM_#o7Z8016P z3wilmQ1gAb^e?oUzNx(_{G?$443}%j(MuPb$d!#Chrw z=zf(^b6@Ryp!SKFL=COH07|1UD7#?PfO-jr6t9385eV&Id!$Gq(eL*u*q})UXZe!P z12E{yf}lFXN*RcvIYRdnAT}2)o7%W)tgI<5D6Tn0z1r9RvZAKG{Irj4Ep!LKTl?mB<-IAxr0>eSk*Kb1Now^t^lad zySsoj_{)|=^;Rl^)tuCMz?wwN&4#iDy!V6s3Amxek&qSM2h$^tXeDt)D5k<8Wcfc02I@3EWnj|F@3aB0uQZZNx*` z{#t@^_N(o;o0EbRw~v|zlP4U~N4J81#^F1%Xid}->6={1jx7Pp>Yg;2hFkRqbXU_?P?vzwm_M@12tgC0MY~4lf+yqC z1(`Zih3c^Qp?&BIy|50*urv0+CLa#gzzZClb%7anOA4o9 zUCjE4ud2A%1Sdk=r4s645{8&d~El*I0j>I)O8`ks$@x?_q zxaGu)w-)6EeZ9|YNtg**P%U7BJuxN>*D*lkBsl1RV;l)WsY|fNL;PrP3kw26Kvm$b zfnVYAe_mMi;m{8x)L~3W5MNw$XNL(Sau_fHG>Zw1fqq-R)0_H5;@bKRW5Sv0jLr%Z zwD@Si1n?7>2ovI|!u)WRL*)g37GiSm;KtEstlNb{c)R0Hws>p{iAwhg`(VZ10XkiP13E+t;kvSoOlLdUR z5Q%#PH;f4h;){#!tS~_f&In8}CB}pV?TI6CPjH4YAwhg`(VZ10Xdx(p3C6^jaLOs^ z$OmANxTmDUn2;d8xaiIb6Z+z8?lKw@V?u&v+>yBFi^G_ZAilWh&YTGY(L|vq=o4c? zf+yOMxHjm+n2;d8xaiIb6SSDAzyw`lOi1wPI}$(q@37614>bI67NEjb3lxi9xaAA` zw}7I81)5)xAQ%J>wpE}e6d_=HAOs3GxIKr}o?r}wkMz$96ST;%zyyfqmdNUmpe;cp zethDgOh^!4Ty&>n!n}l}NF4u`gugXnQCnyKgoxU3*!M>}NYu%RvE@unriP7=K6HhC zFoE?2y%1RBd!A(uX3O`JsI~CV&>@l%Yx(I;fCn7_M*cCTUXE?i4765=7iCYlH)W zz}2g`gM!UQ zmYz*rx;<%4#F#pfNVEyQKYv!NP1mEkwXKnH>{rYq3`ciJ?v_MRJE@7}Gh`a^i1>DK zEZT$2qQ^y#uFVN1Ne?j|pST5S(FFX+&1gK1yA@f)17UdjEhqzi0xT9kbPJj&o;?nK zelto%VF5bLEhtqyBNQ*c8Kt0c0&H5`vX~G$EG7mmSM=E(b6nl?T=QHr`&@HJZ$fu0 zTY->xFc_b|2~>vQa}E@W(^en@;ZcsowKt)36pTGLA-j0a82rI-Twa+HabxZ_43;Bfm=)QHy<5+s@GDOPx=O3J*YG=o2O z0Li;^klZ^WVe!)0s9GE?_e$wp??pNhzcve*#St>^Lv)UJh+(|{q-&AXTg(`d%v&W5 z2OfoY2lD}9y(?LZw^Dk!ypLHuV!V-39UT4 zi5T~I5ZOAGy}PbwOHum{q zv$w0SpV6kTn2>+`^Dn%g_iYR2ikEeSx<5JMi4T#9>O}fjPa5{@XVu;#^W048lS`dh zv%(X+llq&bCVJMvGO5E_1JCawtM|UqKVBxg@0+2Z%4C}-at!`+KWp$#oZrIe*TsKh z)%}>{T|2*@No+hfpD&(tpO;&(jfsw`-7_&YDAPN0IrV* zvKem2iJL+%eD|4GvuGO=VOhN_Dax_TdwkJWsj_!=aLA-gPb-bTd=aR34<<1B8z-Bs z(XkQUk-;+Afl5H~P&UGKlWp^&{#EY>u1s+A_P&*TR=2T0>>-uQCwUaFm0UyBg;j+F7E^8*i zWp&i!Ye*HbNS^AYcYU*aj4DmGJN21U1hGh=^3wYRNSqvhIYN@V6ONn}131P@zb=4d z{a|W$!WjW9pXjB}3gC3v?ui#q58amGdgsaZ2tEU>yDUR<)uI zBwUvue5wuNMx|tZTb-ziAUxrh#RuV$UptDl#Il^LWA6NwVOdRht_)iX`nNGIzB(@u zsdKK8KdwwLSEZm#NJqaUDF@`LU*$2A>OW;bpO66&WW8S!XEx-|U#HgLY%v)$IaF*H ziS6Pugp)W*Mv`}s7bG`I&d^D8Km8Unk-b^UNSDftvXcs4;Zsglp5@vRA(|y}i;jp6 zdB>tiSWGDDp4rjnTF}!u$JNqdpX(BCBC-86H4gvlRumN-L5D>} zAkFte6ROSZFBnnEavyF~T=IueuE%1qIPxws!pqB}K$Y!eT>FyMI~*(w3f1M`xdK7l*(46O>RDR;@%6Q2Zzlg=<%$ z2oTb>62+nffhri^yAldn96q`d#3%Ye8&*Lbi^qwppu8se%2y#B0%Ff3l!TwlCUQ_JMma<;Q18qoCZUPKuS^`CN0@=GCXdKa zr2X_XYIAZ+3vf^Y5$3%_#p9Rri1WbwNIs!Q>G)heQ4Y+S0>UU!`x)?O`9uURFCb{V zs(?sBY50i(B3+Sjg|tp9F%C~YL&f`&3yBIeTq!P}*laUu^UE4cMa6mfP1U9K^%h%m zd9}IDrnThf=9}uP8q5_%I%}RLhtDa`&DG^L*V>G=ruv2wU16QYYAeubw7goYQEPN6 zEuY21pC-d%&>9VTeE)zruF2eF(3o>|)fP=&qp^(F)f;Wqx_WD6Lw!?oU43C$fwj2U zmfxs1Sq(*%xplm?x-_?}xl)_cSYxiQE!137zMePnIDU~hf6V1twPwBEf)^|js|>{% z-SxAJ+bo@pmf{(?t|q>-AhX1k=XT}g+q5;ab2D`2487CnEX?nBY8!2px`Kh(Hhg4} zIL!64M7ui>8 z!%Syp?~ID}ygd6tWA}i&tGrrgHrtA8?F0EWc@>Ts^Kvr{Gdnu9`1M8NkWu6H<_weG zpy746V^FLzJL;RNJM(oG`z&o)X?~Zvw6>zUJHs|m*zC|2*14OD=Cx(^6zBEM?kh3d z>l*d-3v4~Mj!wf>LU|ppGxPXU5NfFDGw05!?C&--&Y9Vz%j=onZEC8`FwV*EZk#=< zwzg-1hOf)zi%s3m604>xb6$_eG-n>pUMvo~+)S-bZ!%c$jK$)lCPQJ9wYtcvH`F$j zm@2LL#Wq`&w$NNyqN%LRZ8jG0jnz43bA5AFb&0vOfiEw$6*gB}n@svN6Vz4R0tTkw~THLur9A(I~&#txBx^w1RJFALZxy~k|p)_xv zwW6e=&^^#mP^{B*JIkx<2Fe%AnPo6Gxf(4QT{H3iCF0njQKv9vfNz_1hWTQJNG4_sRO+pTb-$V%8K0#WG;1wJEuJ$Zjxlt*G7LovG}es# z#_HLXCAFC)neGJ{^=-{_oBGV{d40}#jkTezMytd7hQyhd&yG&3 z)$-=yrD8_KaCa*gD)n5fFKD|sMLG0G^Mbs9^eE8(IQr?I!XNK_R6H~P54su^MNC72 zYfQk)cZ%Z+{^-*YhYze|WAIEw&M~2qBMRjEwm2Pub9w855+5f09NK z6@PH*Kp6T-n3(vDc}xA>ZteSFL@WhYdy-34p5wZq1wN)6DxzTEi1?!pNB$(|zdL_J zzpgmMPlEpF)QJm)fw8|eYd=p~?0-xK_XQD}`&1}sxRv-$nUY_hU#}@M8jVn|`N}3e zpWE2LYYe${`of~}s@jICypm#rwV?_>*GVK|YbQ~WQkPp$Vy>+zs;cK3tkq36-e~6Y z43#-Xp0BH{%PVW-tMco4Q;`o?zRaxRc^9wKIqhnl!DUzTM!iFAHnv${sjJoN>}H+A z?l$(gx;nG$ZF6UK^tpQb`Z{O3Ixf#yrYyZSO9!0*aBGdj+lkX$^Y0NWtaw7uRuS4N zP89DYPf!)qh{QvO(GM~g*p1Q@nL~D1K0)3hKdbCle$DlB1UFB#9NOx*G45PCOdE?l zw7>6L9+rVOS&4K`dsz$QA=B}~BEpC=@naQ42!62$nlPakpNupC$XF{8E{juDl&j)8 z?Z1y4&$be36wrpWzJpfcQ^aJ_VJ0>51Y@lYdak1-L06876GdqiOI&8(~5G zC|r)0+Xw^F;-_qouMDH`GT^1?R6Pd6a_}Zy;mv#`RE1Z2Q!S8?Sef1WVS_7ZBbX8nmXG@#C&lQ)YimSbf+8}hwqtuHp4;@Vam7&*b z3}&;3M69R|u^uXiakvadyUV3fn~W2j7Bz2oo74`S$*4B*W{1J;GHJ9%Cw_pV3WXX! zDhE1HJ%3*irLqap6^uR+p-;pg66M4&nL{0sOp)xSb6%p1*&SN&}Ut ze*=OBoGqir<7ZD$v~T9$C=(jiP_J{s(r?t;{9Y@LB68SkIO`|cjX|#HiLri2MKLZjWsT1TITzakA;RIUV zX>_UWP!H9@Ca|5>x_OOOWB2{gLW#SE&r)PixJ-Vlh}*@@;v~w~lpV?x#bHIaLM7iP zzZJTl*>aOXMvK@OB&zG3*X8P&KeNpxb{ic=-qF^k*4TN2n&&MhwcTmg2(ixFT_&B` zVK(7?m#E1Jywk;7cqi0nrx7-#?RJCO;)M0M#RAspOnQq`@5HL_DRqLbO{;ZV96FFG z%!dQ^ufxIo5^6Z zyYS)fsjLK}MWeB3EFj5c5bSj5)n>QDqBiK-c(=jqbQoc&nDPS^m!NSu^>(*Y2Rfmd z0INZ|v(2d1nY3VqOT)ME4&3nrHTgP?$pFs~Qm)COHNuDtZnfDAt%%8?Oj*0Vr`_JsH_zT7HfuF*i_@)9>$QT_Zj)AR*BLeHHl0~-;M)v(m&Jr7KT_EV zZM@UK>l_YH?r_6Q2pjn>gIVo_foWh>>UKKxc-D`C)lNG=oO-oM$D3h%S_{|@R%zUJ zm)7mjxOFZq-t{Auo(+!pxHW!ydycOKp> z{1^8H_X+ndcY=F^dzCxrQ?)YNxwVy~uzO751Ds@9xQsg>3cP{fPH}H@2e=P`?=1H^ zcNiRJ2Z#5?NJxArMlxxFjGHdvR&vL=m$=O!eL6SHeaYG-9_(70==6nUV(x zj+>2XjU)yH6mWRR!UZbm2^P&o)$@|Mkpy`#N8rsxA;?{ha5{Ck~B*`rB^XA%z5?!*#>!%{FtIk zDd#qDccAal6lm3oxf}UP;P2k1tcw4Ub?7=l-*{a44mAnhbNQ`) z1gu8akHTdb05smCq=WGGlN2Wl^g(f{9#s>LI!WcCmdj|To}?6b%}L1J&4RSas5O9G zdYT%GlTT5pFk4lps1jrsOrMAko}wzi2mr-m)hW<3^)wZS90GD77;)+}R1l|setqd@ zsLcr7fM0%>nu6N!F$6E#prlFh@ewKiO;e0U?K zmx2#(1O6TkZ(J>z1FUOB+(zyn+(qs!VBEp2^^IR6=>b;Ow|1MPw^D+*O(Jd+u&m~W zT+n5~ZVt*pti@VZU#6)psi@4b{w5bq{t#%@)|%(uswkBJODs{9WD9$SF$vXPttS(nj3KSE|yl{Q#4#wr8D4mQV1Nu?bx&#bXWr{O=>p8F(CoIqYTYq zDF+up4C=g0!s`s34K4=`Z4?i-;b@Z9pjo(rq)WlIw~+KiG+X!;f!`zPB=BXHqRnWI z5GqlAVLmrSSBb%+K|;J>PyGKi0}D~tsNps+@)?%SL*1hw1s-8(DbAGA8DR4bQo027 zT!ws2N*6M7phHhXx8>j{}t1ElFY|xQqiqU4o!UyoRGA(fnV?*0+MN z+gLgjf5y>aKC+c&%SYEJ!V3c}uh;8v^rQ5tE5TZ$)_|oue+AZz%Z+HBv6C z7eR3x;1ZP2DyJ)H#U6!S{!jTTxmi{Zv2B!1^t8xjB8YD!$`LJF9D0rT)Mi*LdfsTl zOCy+d6~8B7;uhD+q|iY6g|@TOtD9YZRH!|YDf(qpqu_U=m=vsuVo3Z!B$ILt0^ozy zOcaxQ4ID71;yqE!glprbgla5mo%yG>{0dXTaY;0j=*OF`!uxG{IGRcR4gAN^Or;-X zyowd94aJTaCha%ycgHYBonX~ftc!$Y;ap*z9C)?T8@E`RS4 zaBp%)pikNluCW;^%@@#Z?Q3E3F}X_|Mjho2a|Zx(FNgh74pm4dF>>xn5p-+EeabH8 zUeLe<>eI7BOi`pOB(crA@Otf#oc4iF;P4oL=-=Q6hiQ$E+I8OASh?&d$gB_B+EuSWT zRH0IARD7eH&fUiZ(GSoxb%d%VUm;V)XT)YSBzn+0M%7Pxsyq_BRmIexRe|PGnh8bh zMHLf_U(00TaYHOKfiQR|Y|CWI(8@r%^wT8Af;B>+BVs)yUZ`SX@pCFDkgsMy+5FR} zuqos+tB1hNYGxX`J>eujxz{x-CGzIGq`TN76w`ikitFJ(LIG z%yh8d|@U)}e)$kyXSohM3x3|JjIJ7Wq4nIi2mU zj=r9SKmQ-_{UweuiRtz}I!q5e$9oA(99o1WiOe*5?mXC_1z2e!lYj0dd3TeD z7UTC5nF6|hKCCx@E;9*6z63WWF~!&DVY0xqU%H$r6aHgk6pg@;;S@%Nmf_^G1};pzs|I9=ngz*gs~wn{_6-+N+c^_L3fs^@WoX! zEehWgpD`6;BAE+=lSmaHV!twwz9N2PJ^}EA^)xGhU-$_e#jzg|$x0H_L4Nq73LhEG zJ~w7UqouT=(Nrv?#;*H3S4g7KBG{*R zh$~j!qqwA4A|HWcYbmm2(o@pw*}d#cmS*hqQ}krXT@owxA(c&TA|r@FB3XPu90M65 zdmkHxTf5lxQflT49DT=_kijf(RX30I-E6ij;jz6t^et1PJP8)>>)i&tyPIui4By{s zwrxoAU{w#hRT@kdriX7^-_>gJM)vafQV*LTO{q%TWbVGFv&QIM0g>&YCwwUz4SEb!sGqqLgozlk`6x6? zf$-LTP#5vaxMz|+iTT@L2HtlF)b-A1vl%}BeNC}?Y7l;UKD#ZL?<_1Rp0VERiBBIr z@2Tx&Gx3%MY@0M>W!Lh^gT$w;Y5p^rcY<%D!w$ht#K3X%;yKf8FX2g!YT||R)3%+51?PoOi&o<>pUs{S!53t*0F*ltkO^RdvcKh zKfQ=`OD7(-zclINxWU#$|G7-uG6;GSUwfG0KTnX=BzVukgzX;$J&8opfc}`_U3_*B z^o&20xF~KnzYq^CX8UD?X4i-0{2P>>gjjs~JZr#)C2YGa`N_YHG2Qk25>LF!e>M}J z1gKQi^34b8n&gwMG2ZP%JRTYnCiGz3QFhVcx|(Rbpo?e=2($kEjK5!S%ExZfpV($XFSGTZrcwh?%NZSI4kI zF@bk88^N|r^}{R(PkS6-m+oZ8v)4`W!}LJ+6u`FL$%f)@H?h<4-J3vs*(MPG-9`}q z@J0~dx)H?7HUexI3-<9p+m|^EypLe8PlDMS@IxEe>1@5$k4;ASi-!TbWdqCMAJzl! z!|Op_%zAbuUVjIBJLc|SR}61wNmW*~==y&b%M9b7pQfisw!%DqMCFiol99w>B1L>q ztin^zva$I3ee8NBGi_UJF8l6FxaB>zKP2|vwfAO*{}}FxZ}`cv)dYSnQJRL=9A#%n zXTEYmbVrok6JPH?JdH2B20@qfL)&>HZkFS!*Vtxe${gpix*rlg!IzG(Wek6$>igo0 z)7InbkAUZ?4mMY)-&@jwM_vWbv;2K^dX>r{^(0jJBe?5Twmn#W;UD|PhCY3tC%$qt zvV@b=`0y*Nn=#6^Pq#gAb_i=;VVk9szxeXHx{3?oHRXX=4tj+hFH?6vd}`XZM4czH z%zu~~_Z?a^~EyYs*iSA?qpL?E7#|1C53GCvB)uNqW$!e_rSpM=QaQXNpq4Tu6 zmUrQZmmr6PJiB&M6x()juuwQ)ZNzUKWT(m$2k(jtF3xp$5(@Ch0I1D42x=pLysJB9 z6Soyt9$>dJCii3K%2prv8~*%77-3pd&ApR%<$PF^>ksCmFS22>#23y+@gtJep2!^k zVQZ{=k!_KV*SvTl_RLxPV7C9XHGcdB$dP=7i7cM9dyXezS|GL+F9^@8?%5W@LzAm; z=JSvwjnijmmQL89Y@OndY2tZ^=}EKl%{3VTmA!r}n zPDe;ykqk*PsrM-_RYVFM)dM76ktMCdld`0x_*09t5^uA>U4?K*2zR^9aMx~zyD+$8 zdi-(1Av_W$%_d}-J-BUzt-=#VAlMGQ%U0sW@4}s0xI1?S?$(}RE16k8GwlqQX3J%n zv&v`sAz=s~jgV&JDQ8#)KX4kzn@$6xLb!YO6x?;6Vr@9$6vW}7lMsjVg-gmw2=3GG zuw{7FJ8+l#4v?HY!M5Z66Kop}J;BKuy_Ww@7Z8-}CSKGbxWP#bE9qfmKtH-O8 zq&YI_3yR0MF~Ji(3G<-f^7ulcbOxg>-=AJUpPGVW-(#m@TcWg8I`y7olN%mS_^h=T zs;(Y-aA~4+;*xjela|a1s_B8MtH&!7q~jUI^DCDJ=dHVl?O%WkPn4vTWxX|f>tL5q zb@h1s7i^A9b71|J{Gwxho`g<_S*VSlvon~Cg}Qrkf`T8z8$JVviX+kL%xwmH>s+Y1 zdOY(pHjzm?_0{l{_82W5xd=&N+~Nmj#|%Ds31?pfJqB_{dx_>9P^1#XvZDN5?}o51^nlaAsJ-c9{O%hYV=*6 z1ZN;Bbss|kOm+Y3&z7%O7kT0x{(}3bkD%aYzT5X{9yz7B)gCCgDIYasjCWnb6fE{=b^C6 z?>^izX4jriJ@MBC3hk`(5Ta8vs;4w4?yJMc-iMS*&p4YVZP`8<55CWqaam7`&Qx2F zzr9bSVAnlJYJ(^$qa 0: + size_info.size += len(decompressor.decompress(current_data, CHUNK_SIZE)) + current_data = decompressor.unconsumed_tail return size_info, fn diff --git a/tools/uncompressedsize.py b/util/uncompressedsize.py similarity index 84% rename from tools/uncompressedsize.py rename to util/uncompressedsize.py index 53bb74126..7cfa86e2a 100644 --- a/tools/uncompressedsize.py +++ b/util/uncompressedsize.py @@ -1,5 +1,6 @@ import logging import zlib +import sys from data import model from data.database import ImageStorage @@ -15,6 +16,15 @@ CHUNK_SIZE = 5 * 1024 * 1024 def backfill_sizes_from_data(): + logger.setLevel(logging.DEBUG) + logger.debug('Starting uncompressed image size backfill') + + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + ch = logging.StreamHandler(sys.stdout) + ch.setFormatter(formatter) + logger.addHandler(ch) + while True: # Load the record from the DB. batch_ids = list(ImageStorage @@ -47,7 +57,9 @@ def backfill_sizes_from_data(): if len(current_data) == 0: break - uncompressed_size += len(decompressor.decompress(current_data)) + while current_data: + uncompressed_size += len(decompressor.decompress(current_data, CHUNK_SIZE)) + current_data = decompressor.unconsumed_tail # Write the size to the image storage. We do so under a transaction AFTER checking to # make sure the image storage still exists and has not changed.