Add perf comments

2014-11-05 12:27:38 -05:00 · 2014-11-05 12:27:38 -05:00 · a35bc11912
commit a35bc11912
parent 408fd229a7
4 changed files with 20 additions and 0 deletions
--- a/data/model/legacy.py
+++ b/data/model/legacy.py
@ -1148,6 +1148,8 @@ def __translate_ancestry(old_ancestry, translations, repository, username, prefe

    return translations[old_id]

+  # TODO: PERFORMANCE IMPROVMENT: Select all the ancestor Docker IDs in a single query. The values
+  # are retrieved in the above translate call.
  old_ids = [int(id_str) for id_str in old_ancestry.split('/')[1:-1]]
  new_ids = [str(translate_id(old_id)) for old_id in old_ids]
  return '/%s/' % '/'.join(new_ids)
@ -1164,6 +1166,9 @@ def _create_storage(location_name):
 def find_create_or_link_image(docker_image_id, repository, username, translations,
                              preferred_location):
  with config.app_config['DB_TRANSACTION_FACTORY'](db):
+    # TODO PERF IMPROVEMENT: Also make this only lookup the image directly, rather than
+    # joining image and image storage, etc. The common case doesn't need that information
+    # (but other callers might, so double check)
    repo_image = get_repo_image(repository.namespace_user.username, repository.name,
                                docker_image_id)
    if repo_image:
--- a/endpoints/index.py
+++ b/endpoints/index.py
@ -228,6 +228,8 @@ def create_repository(namespace, repository):
                              for desc in image_descriptions])
  new_repo_images = dict(added_images)

+  # TODO PERF IMPROVEMENT: Doesn't need the locations OR the imagestorage, so just select the images
+  # directly. Also use a set here.
  for existing in model.get_repository_images(namespace, repository):
    if existing.docker_image_id in new_repo_images:
      added_images.pop(existing.docker_image_id)
--- a/endpoints/registry.py
+++ b/endpoints/registry.py
@ -151,6 +151,9 @@ def get_image_layer(namespace, repository, image_id, headers):
        return resp

      profile.debug('Streaming layer data')
+
+      # TODO: DATABASE: We should disconnect from the database here, so that
+      # we're not holding the DB handle during this long download.
      return Response(store.stream_read(repo_image.storage.locations, path), headers=headers)
    except (IOError, AttributeError):
      profile.debug('Image not found')
@ -212,6 +215,9 @@ def put_image_layer(namespace, repository, image_id):
  h, sum_hndlr = checksums.simple_checksum_handler(json_data)
  sr.add_handler(sum_hndlr)

+  # TODO: DATABASE: We should disconnect from the database here and reconnect AFTER, so that
+  # we're not holding the DB handle during this long upload.
+
  # Stream write the data to storage.
  store.stream_write(repo_image.storage.locations, layer_path, sr)

--- a/endpoints/verbs.py
+++ b/endpoints/verbs.py
@ -24,6 +24,8 @@ logger = logging.getLogger(__name__)
 def _open_stream(namespace, repository, tag, synthetic_image_id, image_json, image_list):
  store = Storage(app)

+  # TODO: PERFORMANCE: configure DB, load the images, cache them, then disconnect.
+
  def get_next_image():
    for current_image_id in image_list:
      yield model.get_repo_image(namespace, repository, current_image_id)
@ -55,6 +57,8 @@ def _write_synthetic_image_to_storage(linked_storage_uuid, linked_locations, que

  queue_file.add_exception_handler(handle_exception)

+  # TODO: PERFORMANCE: disconnect from the DB and reconnect once the stream write finishes or on
+  # error.
  image_path = store.image_layer_path(linked_storage_uuid)
  store.stream_write(linked_locations, image_path, queue_file)
  queue_file.close()
@ -132,6 +136,9 @@ def get_squashed_tag(namespace, repository, tag):
    storage_args = (derived.uuid, derived.locations, storage_queue_file)
    QueueProcess.run_process(_write_synthetic_image_to_storage, storage_args, finished=_cleanup)

+    # TODO: PERFORMANCE: close the database handle here for this process before we send the long
+    # download.
+
    # Return the client's data.
    return send_file(client_queue_file)