Database optimizations around image creation and logs lookup

2014-11-06 14:48:16 -05:00 · 2014-11-06 14:48:16 -05:00 · c569299e5c
commit c569299e5c
parent a35bc11912
2 changed files with 128 additions and 69 deletions
--- a/endpoints/index.py
+++ b/endpoints/index.py
@ -223,15 +223,20 @@ def create_repository(namespace, repository):
    repo = model.create_repository(namespace, repository,
                                   get_authenticated_user())

-  profile.debug('Determining added images')
-  added_images = OrderedDict([(desc['id'], desc)
-                              for desc in image_descriptions])
+  profile.debug('Determining already added images')
+  added_images = OrderedDict([(desc['id'], desc) for desc in image_descriptions])
  new_repo_images = dict(added_images)

-  # TODO PERF IMPROVEMENT: Doesn't need the locations OR the imagestorage, so just select the images
-  # directly. Also use a set here.
-  for existing in model.get_repository_images(namespace, repository):
-    if existing.docker_image_id in new_repo_images:
+  # Optimization: Lookup any existing images in the repository with matching docker IDs and
+  # remove them from the added dict, so we don't need to look them up one-by-one.
+  def chunks(l, n):
+    for i in xrange(0, len(l), n):
+      yield l[i:i+n]
+
+  # Note: We do this in chunks in an effort to not hit the SQL query size limit.
+  for chunk in chunks(new_repo_images.keys(), 50):
+    existing_images = model.lookup_repository_images(namespace, repository, chunk)
+    for existing in existing_images:
      added_images.pop(existing.docker_image_id)

  profile.debug('Creating/Linking necessary images')
@ -243,9 +248,8 @@ def create_repository(namespace, repository):


  profile.debug('Created images')
-  response = make_response('Created', 201)
  track_and_log('push_repo', repo)
-  return response
+  return make_response('Created', 201)


@index.route('/repositories/<path:repository>/images', methods=['PUT'])