Add perf comments

This commit is contained in:
Joseph Schorr 2014-11-05 12:27:38 -05:00
parent 408fd229a7
commit a35bc11912
4 changed files with 20 additions and 0 deletions

View file

@ -1148,6 +1148,8 @@ def __translate_ancestry(old_ancestry, translations, repository, username, prefe
return translations[old_id]
# TODO: PERFORMANCE IMPROVMENT: Select all the ancestor Docker IDs in a single query. The values
# are retrieved in the above translate call.
old_ids = [int(id_str) for id_str in old_ancestry.split('/')[1:-1]]
new_ids = [str(translate_id(old_id)) for old_id in old_ids]
return '/%s/' % '/'.join(new_ids)
@ -1164,6 +1166,9 @@ def _create_storage(location_name):
def find_create_or_link_image(docker_image_id, repository, username, translations,
preferred_location):
with config.app_config['DB_TRANSACTION_FACTORY'](db):
# TODO PERF IMPROVEMENT: Also make this only lookup the image directly, rather than
# joining image and image storage, etc. The common case doesn't need that information
# (but other callers might, so double check)
repo_image = get_repo_image(repository.namespace_user.username, repository.name,
docker_image_id)
if repo_image:

View file

@ -228,6 +228,8 @@ def create_repository(namespace, repository):
for desc in image_descriptions])
new_repo_images = dict(added_images)
# TODO PERF IMPROVEMENT: Doesn't need the locations OR the imagestorage, so just select the images
# directly. Also use a set here.
for existing in model.get_repository_images(namespace, repository):
if existing.docker_image_id in new_repo_images:
added_images.pop(existing.docker_image_id)

View file

@ -151,6 +151,9 @@ def get_image_layer(namespace, repository, image_id, headers):
return resp
profile.debug('Streaming layer data')
# TODO: DATABASE: We should disconnect from the database here, so that
# we're not holding the DB handle during this long download.
return Response(store.stream_read(repo_image.storage.locations, path), headers=headers)
except (IOError, AttributeError):
profile.debug('Image not found')
@ -212,6 +215,9 @@ def put_image_layer(namespace, repository, image_id):
h, sum_hndlr = checksums.simple_checksum_handler(json_data)
sr.add_handler(sum_hndlr)
# TODO: DATABASE: We should disconnect from the database here and reconnect AFTER, so that
# we're not holding the DB handle during this long upload.
# Stream write the data to storage.
store.stream_write(repo_image.storage.locations, layer_path, sr)

View file

@ -24,6 +24,8 @@ logger = logging.getLogger(__name__)
def _open_stream(namespace, repository, tag, synthetic_image_id, image_json, image_list):
store = Storage(app)
# TODO: PERFORMANCE: configure DB, load the images, cache them, then disconnect.
def get_next_image():
for current_image_id in image_list:
yield model.get_repo_image(namespace, repository, current_image_id)
@ -55,6 +57,8 @@ def _write_synthetic_image_to_storage(linked_storage_uuid, linked_locations, que
queue_file.add_exception_handler(handle_exception)
# TODO: PERFORMANCE: disconnect from the DB and reconnect once the stream write finishes or on
# error.
image_path = store.image_layer_path(linked_storage_uuid)
store.stream_write(linked_locations, image_path, queue_file)
queue_file.close()
@ -132,6 +136,9 @@ def get_squashed_tag(namespace, repository, tag):
storage_args = (derived.uuid, derived.locations, storage_queue_file)
QueueProcess.run_process(_write_synthetic_image_to_storage, storage_args, finished=_cleanup)
# TODO: PERFORMANCE: close the database handle here for this process before we send the long
# download.
# Return the client's data.
return send_file(client_queue_file)