Load images and storage references in bulk during V1 synthesize

Currently, we perform multiple queries for each layer, making it much slower (especially cross-region) Fixes #413
2015-09-29 17:53:39 -04:00 · 2015-09-29 17:53:39 -04:00 · 35c35d9913
commit 35c35d9913
parent 78e8aefd45
4 changed files with 173 additions and 157 deletions
--- a/data/model/image.py
+++ b/data/model/image.py
@ -105,13 +105,10 @@ def _translate_placements_to_images_with_locations(query):
  return images.values()


-def lookup_repository_images(namespace_name, repository_name, docker_image_ids):
+def lookup_repository_images(repo, docker_image_ids):
  return (Image
          .select()
-          .join(Repository)
-          .join(Namespace, on=(Repository.namespace_user == Namespace.id))
-          .where(Repository.name == repository_name, Namespace.username == namespace_name,
-                 Image.docker_image_id << docker_image_ids))
+          .where(Image.repository == repo, Image.docker_image_id << docker_image_ids))


 def get_matching_repository_images(namespace_name, repository_name, docker_image_ids):
@ -415,31 +412,14 @@ def get_image_layers(image):
  return image_list


-def synthesize_v1_image(namespace, repository_name, storage_checksum, docker_image_id,
-                        created_date_str, comment, command, v1_json_metadata, parent_docker_id):
+def synthesize_v1_image(repo, image_storage, docker_image_id, created_date_str,
+                        comment, command, v1_json_metadata, parent_image=None):
  """ Find an existing image with this docker image id, and if none exists, write one with the
      specified metadata.
  """
-
-  repo = _basequery.get_existing_repository(namespace, repository_name)
-  # Sometimes the manifest may reference an image that already exists
-
-  found = get_image(repo, docker_image_id)
-  if found is not None:
-    # The image already exists, nothing to do
-    return found
-
-  the_bits = storage.get_repo_storage_by_checksum(namespace, repository_name, storage_checksum)
-
  ancestors = '/'
-  if parent_docker_id is not None:
-    parent = get_repo_image(namespace, repository_name, parent_docker_id)
-    if parent is None:
-      msg = 'Parent not found with docker image id {0} in repo {1}/{2}'.format(parent_docker_id,
-                                                                               namespace,
-                                                                               repository_name)
-      raise InvalidImageException(msg)
-    ancestors = '{0}{1}/'.format(parent.ancestors, parent.id)
+  if parent_image is not None:
+    ancestors = '{0}{1}/'.format(parent_image.ancestors, parent_image.id)

  created = None
  if created_date_str is not None:
@ -451,4 +431,4 @@ def synthesize_v1_image(namespace, repository_name, storage_checksum, docker_ima

  return Image.create(docker_image_id=docker_image_id, ancestors=ancestors, comment=comment,
                      command=command, v1_json_metadata=v1_json_metadata, created=created,
-                      storage=the_bits, repository=repo)
+                      storage=image_storage, repository=repo)
--- a/data/model/storage.py
+++ b/data/model/storage.py
@ -232,5 +232,10 @@ def get_layer_path(storage_record):

  return store.blob_path(storage_record.checksum)

-
-
+def lookup_repo_storages_by_checksum(repo, checksums):
+  """ Looks up repository storages (without placements) matching the given repository
+      and checksum. """
+  return (ImageStorage
+          .select()
+          .join(Image)
+          .where(Image.repository == repo, ImageStorage.checksum << checksums))
--- a/endpoints/v2/manifest.py
+++ b/endpoints/v2/manifest.py
@ -98,7 +98,7 @@ class SignedManifest(object):
  @property
  def layers(self):
    """ Returns a generator of objects that have the blobSum and v1Compatibility keys in them,
-        starting from the root image and working toward the leaf node.
+        starting from the leaf image and working toward the base node.
    """
    for blob_sum_obj, history_obj in reversed(zip(self._parsed[_FS_LAYERS_KEY],
                                                  self._parsed[_HISTORY_KEY])):
@ -258,36 +258,73 @@ def write_manifest_by_digest(namespace, repo_name, manifest_ref):


 def _write_manifest(namespace, repo_name, manifest):
+  # Ensure that the manifest is for this repository.
  if manifest.namespace != namespace or manifest.repo_name != repo_name:
    raise NameInvalid()

+  # Ensure that the repository exists.
+  repo = model.repository.get_repository(namespace, repo_name)
+  if repo is None:
+    raise NameInvalid()
+
+  # Lookup all the images and their parent images (if any) inside the manifest. This will let us
+  # know which V1 images we need to synthesize and which ones are invalid.
+  layers = list(manifest.layers)
+
+  docker_image_ids = [mdata.v1_metadata.docker_id for mdata in layers]
+  parent_image_ids = [mdata.v1_metadata.parent for mdata in layers
+                      if mdata.v1_metadata.parent]
+  all_image_ids = list(set(docker_image_ids + parent_image_ids))
+
+  images_query = model.image.lookup_repository_images(repo, all_image_ids)
+  images_map = {image.docker_image_id: image for image in images_query}
+
+  # Lookup the storages associated with each blob in the manifest.
+  checksums = [str(mdata.digest) for mdata in manifest.layers]
+  storage_query = model.storage.lookup_repo_storages_by_checksum(repo, checksums)
+  storage_map = {storage.checksum: storage for storage in storage_query}
+
+  # Synthesize the V1 metadata for each layer.
  manifest_digest = manifest.digest
  tag_name = manifest.tag

-  leaf_layer = None
-  try:
-    for mdata in manifest.layers:
-      # Store the v1 metadata in the db
-      v1_mdata = mdata.v1_metadata
+  for mdata in layers:
    digest_str = str(mdata.digest)
-      model.image.synthesize_v1_image(namespace, repo_name, digest_str, v1_mdata.docker_id,
-                                      v1_mdata.created, v1_mdata.comment, v1_mdata.command,
-                                      mdata.v1_metadata_str, v1_mdata.parent)
-      leaf_layer = mdata
+    v1_mdata = mdata.v1_metadata

-  except model.InvalidImageException:
+    # If there is already a V1 image for this layer, nothing more to do.
+    if v1_mdata.docker_id in images_map:
+      continue
+
+    # Lookup the parent image for the layer, if any.
+    parent_image = None
+    if v1_mdata.parent is not None:
+      parent_image = images_map.get(v1_mdata.parent)
+      if parent_image is None:
+        msg = 'Parent not found with docker image id {0}'.format(v1_mdata.parent)
+        raise ManifestInvalid(detail={'message': msg})
+
+    # Synthesize and store the v1 metadata in the db.
+    blob_storage = storage_map.get(digest_str)
+    if blob_storage is None:
      raise BlobUnknown(detail={'digest': digest_str})

-  if leaf_layer is None:
+    image = model.image.synthesize_v1_image(repo, blob_storage, v1_mdata.docker_id,
+                                            v1_mdata.created, v1_mdata.comment, v1_mdata.command,
+                                            mdata.v1_metadata_str, parent_image)
+
+    images_map[v1_mdata.docker_id] = image
+
+  if not layers:
    # The manifest doesn't actually reference any layers!
    raise ManifestInvalid(detail={'message': 'manifest does not reference any layers'})

+  # Store the manifest pointing to the tag.
+  leaf_layer = layers[0]
  model.tag.store_tag_manifest(namespace, repo_name, tag_name, leaf_layer.v1_metadata.docker_id,
                               manifest_digest, request.data)

  # Spawn the repo_push event.
-  repo = model.repository.get_repository(namespace, repo_name)
-  if repo is not None:
  event_data = {
    'updated_tags': [tag_name],
  }
--- a/test/registry_tests.py
+++ b/test/registry_tests.py
@ -205,6 +205,9 @@ class BaseRegistryMixin(object):
    self.assertEquals(response.status_code, expected_code)
    return response

+  def _get_default_images(self):
+    return [{'id': 'someid', 'contents': 'somecontent'}]
+

 class V1RegistryMixin(BaseRegistryMixin):
  def v1_ping(self):
@ -212,20 +215,21 @@ class V1RegistryMixin(BaseRegistryMixin):


 class V1RegistryPushMixin(V1RegistryMixin):
-  def do_push(self, namespace, repository, username, password, images):
+  def do_push(self, namespace, repository, username, password, images=None):
+    images = images or self._get_default_images()
    auth = (username, password)

    # Ping!
    self.v1_ping()

    # PUT /v1/repositories/{namespace}/{repository}/
-    data = [{"id": image_id} for image_id, _ in images.iteritems()]
    self.conduct('PUT', '/v1/repositories/%s/%s' % (namespace, repository),
-                 data=json.dumps(data), auth=auth,
+                 data=json.dumps(images), auth=auth,
                 expected_code=201)

    last_image_id = None
-    for image_id, _ in images.iteritems():
+    for image_data in images:
+      image_id = image_data['id']
      last_image_id = image_id

      # PUT /v1/images/{imageID}/json
@ -364,8 +368,10 @@ class V2RegistryMixin(BaseRegistryMixin):


 class V2RegistryPushMixin(V2RegistryMixin):
-  def do_push(self, namespace, repository, username, password, images, tag_name=None,
-              cancel=False, invalid=False):
+  def do_push(self, namespace, repository, username, password, images=None, tag_name=None,
+              cancel=False, invalid=False, expected_manifest_code=202):
+    images = images or self._get_default_images()
+
    # Ping!
    self.v2_ping()

@ -375,30 +381,22 @@ class V2RegistryPushMixin(V2RegistryMixin):
    # Build a fake manifest.
    tag_name = tag_name or 'latest'
    builder = SignedManifestBuilder(namespace, repository, tag_name)
-    for image_id, contents in images.iteritems():
-      if isinstance(contents, dict):
-        full_contents = contents['contents']
-      else:
-        full_contents = contents
-
-      checksum = 'sha256:' + hashlib.sha256(full_contents).hexdigest()
+    for image_data in images:
+      checksum = 'sha256:' + hashlib.sha256(image_data['contents']).hexdigest()
      if invalid:
        checksum = 'sha256:' + hashlib.sha256('foobarbaz').hexdigest()

-      builder.add_layer(checksum, json.dumps({'id': image_id, 'data': contents}))
+      builder.add_layer(checksum, json.dumps(image_data))

    # Build the manifest.
    manifest = builder.build(_JWK)

    # Push the image's layers.
    checksums = {}
-    for image_id, contents in images.iteritems():
-      chunks = None
-      if isinstance(contents, dict):
-        full_contents = contents['contents']
-        chunks = contents['chunks']
-      else:
-        full_contents = contents
+    for image_data in images:
+      image_id = image_data['id']
+      full_contents = image_data['contents']
+      chunks = image_data.get('chunks')

      # Layer data should not yet exist.
      checksum = 'sha256:' + hashlib.sha256(full_contents).hexdigest()
@ -414,7 +412,7 @@ class V2RegistryPushMixin(V2RegistryMixin):

      # PATCH the image data into the layer.
      if chunks is None:
-        self.conduct('PATCH', location, data=contents, expected_code=204, auth='jwt')
+        self.conduct('PATCH', location, data=full_contents, expected_code=204, auth='jwt')
      else:
        for chunk in chunks:
          if len(chunk) == 3:
@ -461,7 +459,7 @@ class V2RegistryPushMixin(V2RegistryMixin):
      self.assertEquals(response.headers['Content-Length'], str(len(full_contents)))

    # Write the manifest.
-    put_code = 404 if invalid else 202
+    put_code = 404 if invalid else expected_manifest_code
    self.conduct('PUT', '/v2/%s/%s/manifests/%s' % (namespace, repository, tag_name),
                 data=manifest.bytes, expected_code=put_code,
                 headers={'Content-Type': 'application/json'}, auth='jwt')
@ -508,11 +506,7 @@ class V2RegistryPullMixin(V2RegistryMixin):
 class RegistryTestsMixin(object):
  def test_pull_publicrepo_anonymous(self):
    # Add a new repository under the public user, so we have a real repository to pull.
-    images = {
-      'someid': 'onlyimagehere'
-    }
-
-    self.do_push('public', 'newrepo', 'public', 'password', images)
+    self.do_push('public', 'newrepo', 'public', 'password')
    self.clearSession()

    # First try to pull the (currently private) repo anonymously, which should fail (since it is
@ -530,11 +524,7 @@ class RegistryTestsMixin(object):

  def test_pull_publicrepo_devtable(self):
    # Add a new repository under the public user, so we have a real repository to pull.
-    images = {
-      'someid': 'onlyimagehere'
-    }
-
-    self.do_push('public', 'newrepo', 'public', 'password', images)
+    self.do_push('public', 'newrepo', 'public', 'password')
    self.clearSession()

    # First try to pull the (currently private) repo as devtable, which should fail as it belongs
@ -552,11 +542,7 @@ class RegistryTestsMixin(object):

  def test_pull_private_repo(self):
    # Add a new repository under the devtable user, so we have a real repository to pull.
-    images = {
-      'someid': 'onlyimagehere'
-    }
-
-    self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
+    self.do_push('devtable', 'newrepo', 'devtable', 'password')
    self.clearSession()

    # First try to pull the (currently private) repo as public, which should fail as it belongs
@ -572,11 +558,7 @@ class RegistryTestsMixin(object):
    # Turn off anonymous access.
    with TestFeature(self, 'ANONYMOUS_ACCESS', False):
      # Add a new repository under the public user, so we have a real repository to pull.
-      images = {
-        'someid': 'onlyimagehere'
-      }
-
-      self.do_push('public', 'newrepo', 'public', 'password', images)
+      self.do_push('public', 'newrepo', 'public', 'password')
      self.clearSession()

      # First try to pull the (currently private) repo as devtable, which should fail as it belongs
@ -596,11 +578,7 @@ class RegistryTestsMixin(object):
    # Turn off anonymous access.
    with TestFeature(self, 'ANONYMOUS_ACCESS', False):
      # Add a new repository under the public user, so we have a real repository to pull.
-      images = {
-        'someid': 'onlyimagehere'
-      }
-
-      self.do_push('public', 'newrepo', 'public', 'password', images)
+      self.do_push('public', 'newrepo', 'public', 'password')
      self.clearSession()

      # First try to pull the (currently private) repo as devtable, which should fail as it belongs
@ -615,11 +593,7 @@ class RegistryTestsMixin(object):
    # Turn off anonymous access.
    with TestFeature(self, 'ANONYMOUS_ACCESS', False):
      # Add a new repository under the public user, so we have a real repository to pull.
-      images = {
-        'someid': 'onlyimagehere'
-      }
-
-      self.do_push('public', 'newrepo', 'public', 'password', images)
+      self.do_push('public', 'newrepo', 'public', 'password')
      self.clearSession()

      # First try to pull the (currently private) repo as anonymous, which should fail as it
@ -643,11 +617,7 @@ class RegistryTestsMixin(object):


  def test_create_repo_creator_user(self):
-    images = {
-      'someid': 'onlyimagehere'
-    }
-
-    self.do_push('buynlarge', 'newrepo', 'creator', 'password', images)
+    self.do_push('buynlarge', 'newrepo', 'creator', 'password')

    # Pull the repository as devtable, which should succeed because the repository is owned by the
    # org.
@ -660,11 +630,7 @@ class RegistryTestsMixin(object):
    resp = self.conduct('GET', '/api/v1/organization/buynlarge/robots/ownerbot')
    robot_token = json.loads(resp.text)['token']

-    images = {
-      'someid': 'onlyimagehere'
-    }
-
-    self.do_push('buynlarge', 'newrepo', 'buynlarge+ownerbot', robot_token, images)
+    self.do_push('buynlarge', 'newrepo', 'buynlarge+ownerbot', robot_token)

    # Pull the repository as devtable, which should succeed because the repository is owned by the
    # org.
@ -677,11 +643,7 @@ class RegistryTestsMixin(object):
    resp = self.conduct('GET', '/api/v1/organization/buynlarge/robots/creatorbot')
    robot_token = json.loads(resp.text)['token']

-    images = {
-      'someid': 'onlyimagehere'
-    }
-
-    self.do_push('buynlarge', 'newrepo', 'buynlarge+creatorbot', robot_token, images)
+    self.do_push('buynlarge', 'newrepo', 'buynlarge+creatorbot', robot_token)

    # Pull the repository as devtable, which should succeed because the repository is owned by the
    # org.
@ -697,27 +659,15 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
  """ Tests for V2 registry. """

  def test_invalid_push(self):
-    images = {
-      'someid': 'onlyimagehere'
-    }
-
-    self.do_push('devtable', 'newrepo', 'devtable', 'password', images, invalid=True)
+    self.do_push('devtable', 'newrepo', 'devtable', 'password', invalid=True)

  def test_cancel_push(self):
-    images = {
-      'someid': 'onlyimagehere'
-    }
-
-    self.do_push('devtable', 'newrepo', 'devtable', 'password', images, cancel=True)
+    self.do_push('devtable', 'newrepo', 'devtable', 'password', cancel=True)


  def test_pull_by_checksum(self):
    # Add a new repository under the user, so we have a real repository to pull.
-    images = {
-      'someid': 'onlyimagehere'
-    }
-
-    _, digest = self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
+    _, digest = self.do_push('devtable', 'newrepo', 'devtable', 'password')

    # Attempt to pull by digest.
    self.do_pull('devtable', 'newrepo', 'devtable', 'password', manifest_id=digest)
@ -725,11 +675,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix

  def test_pull_invalid_image_tag(self):
    # Add a new repository under the user, so we have a real repository to pull.
-    images = {
-      'someid': 'onlyimagehere'
-    }
-
-    self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
+    self.do_push('devtable', 'newrepo', 'devtable', 'password')
    self.clearSession()

    # Attempt to pull the invalid tag.
@ -745,15 +691,16 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
    chunk_count = int(math.ceil((len(contents) * 1.0) / chunksize))
    chunks = [(index * chunksize, (index + 1)*chunksize) for index in range(chunk_count)]

-    images = {
-      'someid': {
+    images = [
+      {
+        'id':'someid',
        'contents': contents,
        'chunks': chunks
      }
-    }
+    ]

    # Push the chunked upload.
-    self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
+    self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)

    # Pull the image back and verify the contents.
    blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password')
@ -765,15 +712,16 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
    contents = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(size))
    chunks = [(0, 100), (100, size)]

-    images = {
-      'someid': {
+    images = [
+      {
+        'id':'someid',
        'contents': contents,
        'chunks': chunks
      }
-    }
+    ]

    # Push the chunked upload.
-    self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
+    self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)

    # Pull the image back and verify the contents.
    blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password')
@ -786,15 +734,16 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix

    chunks = [(0, 100), (10, size)]

-    images = {
-      'someid': {
+    images = [
+      {
+        'id':'someid',
        'contents': contents,
        'chunks': chunks
      }
-    }
+    ]

    # Push the chunked upload.
-    self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
+    self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)

    # Pull the image back and verify the contents.
    blobs = self.do_pull('devtable', 'newrepo', 'devtable', 'password')
@ -807,22 +756,66 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix

    chunks = [(0, 100), (101, size, 416)]

-    images = {
-      'someid': {
+    images = [
+      {
+        'id':'someid',
        'contents': contents,
        'chunks': chunks
      }
-    }
+    ]

    # Attempt to push the chunked upload, which should fail.
-    self.do_push('devtable', 'newrepo', 'devtable', 'password', images)
+    self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)
+
+  def test_multiple_layers_invalid(self):
+    # Attempt to push a manifest with an image depending on an unknown base layer.
+    images = [
+      {
+        'id': 'latestid',
+        'contents': 'the latest image',
+        'parent': 'baseid',
+      }
+    ]
+
+    self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images,
+                 expected_manifest_code=400)
+
+  def test_multiple_layers(self):
+    # Push a manifest with multiple layers.
+    images = [
+      {
+        'id': 'latestid',
+        'contents': 'the latest image',
+        'parent': 'baseid',
+      },
+      {
+        'id': 'baseid',
+        'contents': 'The base image',
+      }
+    ]
+
+    self.do_push('devtable', 'newrepo', 'devtable', 'password', images=images)

  def test_multiple_tags(self):
+    latest_images = [
+      {
+        'id': 'latestid',
+        'contents': 'the latest image'
+      }
+    ]
+
+    foobar_images = [
+      {
+        'id': 'foobarid',
+        'contents': 'the foobar image',
+      }
+    ]
+
    # Create the repo.
-    self.do_push('devtable', 'newrepo', 'devtable', 'password', {'someid': 'onlyimagehere'},
+    self.do_push('devtable', 'newrepo', 'devtable', 'password', images=latest_images,
                 tag_name='latest')

-    self.do_push('devtable', 'newrepo', 'devtable', 'password', {'anotherid': 'anotherimage'},
+    self.do_push('devtable', 'newrepo', 'devtable', 'password', images=foobar_images,
                 tag_name='foobar')

    # Retrieve the tags.
@ -846,6 +839,7 @@ class V2RegistryTests(V2RegistryPullMixin, V2RegistryPushMixin, RegistryTestsMix
    self.conduct('GET', '/v2/devtable/doesnotexist/tags/list', auth='jwt', expected_code=401)


+
 class V1PushV2PullRegistryTests(V2RegistryPullMixin, V1RegistryPushMixin, RegistryTestsMixin,
                                RegistryTestCaseMixin, LiveServerTestCase):
  """ Tests for V1 push, V2 pull registry. """