diff --git a/data/model/legacy.py b/data/model/legacy.py index 34be41491..1fb719b00 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -1148,6 +1148,8 @@ def __translate_ancestry(old_ancestry, translations, repository, username, prefe return translations[old_id] + # TODO: PERFORMANCE IMPROVMENT: Select all the ancestor Docker IDs in a single query. The values + # are retrieved in the above translate call. old_ids = [int(id_str) for id_str in old_ancestry.split('/')[1:-1]] new_ids = [str(translate_id(old_id)) for old_id in old_ids] return '/%s/' % '/'.join(new_ids) @@ -1164,6 +1166,9 @@ def _create_storage(location_name): def find_create_or_link_image(docker_image_id, repository, username, translations, preferred_location): with config.app_config['DB_TRANSACTION_FACTORY'](db): + # TODO PERF IMPROVEMENT: Also make this only lookup the image directly, rather than + # joining image and image storage, etc. The common case doesn't need that information + # (but other callers might, so double check) repo_image = get_repo_image(repository.namespace_user.username, repository.name, docker_image_id) if repo_image: diff --git a/endpoints/index.py b/endpoints/index.py index 1c8e551a0..7394671af 100644 --- a/endpoints/index.py +++ b/endpoints/index.py @@ -228,6 +228,8 @@ def create_repository(namespace, repository): for desc in image_descriptions]) new_repo_images = dict(added_images) + # TODO PERF IMPROVEMENT: Doesn't need the locations OR the imagestorage, so just select the images + # directly. Also use a set here. for existing in model.get_repository_images(namespace, repository): if existing.docker_image_id in new_repo_images: added_images.pop(existing.docker_image_id) diff --git a/endpoints/registry.py b/endpoints/registry.py index 48943bf4c..85b4b7963 100644 --- a/endpoints/registry.py +++ b/endpoints/registry.py @@ -151,6 +151,9 @@ def get_image_layer(namespace, repository, image_id, headers): return resp profile.debug('Streaming layer data') + + # TODO: DATABASE: We should disconnect from the database here, so that + # we're not holding the DB handle during this long download. return Response(store.stream_read(repo_image.storage.locations, path), headers=headers) except (IOError, AttributeError): profile.debug('Image not found') @@ -212,6 +215,9 @@ def put_image_layer(namespace, repository, image_id): h, sum_hndlr = checksums.simple_checksum_handler(json_data) sr.add_handler(sum_hndlr) + # TODO: DATABASE: We should disconnect from the database here and reconnect AFTER, so that + # we're not holding the DB handle during this long upload. + # Stream write the data to storage. store.stream_write(repo_image.storage.locations, layer_path, sr) diff --git a/endpoints/verbs.py b/endpoints/verbs.py index b410251d3..fa3bbe574 100644 --- a/endpoints/verbs.py +++ b/endpoints/verbs.py @@ -24,6 +24,8 @@ logger = logging.getLogger(__name__) def _open_stream(namespace, repository, tag, synthetic_image_id, image_json, image_list): store = Storage(app) + # TODO: PERFORMANCE: configure DB, load the images, cache them, then disconnect. + def get_next_image(): for current_image_id in image_list: yield model.get_repo_image(namespace, repository, current_image_id) @@ -55,6 +57,8 @@ def _write_synthetic_image_to_storage(linked_storage_uuid, linked_locations, que queue_file.add_exception_handler(handle_exception) + # TODO: PERFORMANCE: disconnect from the DB and reconnect once the stream write finishes or on + # error. image_path = store.image_layer_path(linked_storage_uuid) store.stream_write(linked_locations, image_path, queue_file) queue_file.close() @@ -132,6 +136,9 @@ def get_squashed_tag(namespace, repository, tag): storage_args = (derived.uuid, derived.locations, storage_queue_file) QueueProcess.run_process(_write_synthetic_image_to_storage, storage_args, finished=_cleanup) + # TODO: PERFORMANCE: close the database handle here for this process before we send the long + # download. + # Return the client's data. return send_file(client_queue_file)