Change lookup of blobs to not use a subquery, which is murder on MySQL

This commit is contained in:
Joseph Schorr 2019-01-03 14:59:24 -05:00
parent cdb49dbfd3
commit e309508776
4 changed files with 31 additions and 40 deletions

View file

@ -16,16 +16,16 @@ def get_repository_blob_by_digest(repository, blob_digest):
""" Find the content-addressable blob linked to the specified repository. """ Find the content-addressable blob linked to the specified repository.
""" """
try: try:
storage_id_query = (ImageStorage storage = (ImageStorage
.select(ImageStorage.id) .select(ImageStorage.uuid)
.join(Image) .join(Image)
.where(Image.repository == repository, .where(Image.repository == repository,
ImageStorage.content_checksum == blob_digest, ImageStorage.content_checksum == blob_digest,
ImageStorage.uploading == False) ImageStorage.uploading == False)
.limit(1)) .get())
return storage_model.get_storage_by_subquery(storage_id_query) return storage_model.get_storage_by_uuid(storage.uuid)
except InvalidImageException: except (ImageStorage.DoesNotExist, InvalidImageException):
raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest))
@ -33,18 +33,18 @@ def get_repo_blob_by_digest(namespace, repo_name, blob_digest):
""" Find the content-addressable blob linked to the specified repository. """ Find the content-addressable blob linked to the specified repository.
""" """
try: try:
storage_id_query = (ImageStorage storage = (ImageStorage
.select(ImageStorage.id) .select(ImageStorage.uuid)
.join(Image) .join(Image)
.join(Repository) .join(Repository)
.join(Namespace, on=(Namespace.id == Repository.namespace_user)) .join(Namespace, on=(Namespace.id == Repository.namespace_user))
.where(Repository.name == repo_name, Namespace.username == namespace, .where(Repository.name == repo_name, Namespace.username == namespace,
ImageStorage.content_checksum == blob_digest, ImageStorage.content_checksum == blob_digest,
ImageStorage.uploading == False) ImageStorage.uploading == False)
.limit(1)) .get())
return storage_model.get_storage_by_subquery(storage_id_query) return storage_model.get_storage_by_uuid(storage.uuid)
except InvalidImageException: except (ImageStorage.DoesNotExist, InvalidImageException):
raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest))

View file

@ -1,6 +1,6 @@
from data.database import ImageStorage, ManifestBlob from data.database import ImageStorage, ManifestBlob
from data.model import BlobDoesNotExist from data.model import BlobDoesNotExist
from data.model.storage import get_storage_by_subquery, InvalidImageException from data.model.storage import get_storage_by_uuid, InvalidImageException
from data.model.blob import get_repository_blob_by_digest as legacy_get from data.model.blob import get_repository_blob_by_digest as legacy_get
def get_repository_blob_by_digest(repository, blob_digest): def get_repository_blob_by_digest(repository, blob_digest):
@ -8,16 +8,16 @@ def get_repository_blob_by_digest(repository, blob_digest):
returns it or None if none. returns it or None if none.
""" """
try: try:
storage_id_query = (ImageStorage storage = (ImageStorage
.select(ImageStorage.id) .select(ImageStorage.uuid)
.join(ManifestBlob) .join(ManifestBlob)
.where(ManifestBlob.repository == repository, .where(ManifestBlob.repository == repository,
ImageStorage.content_checksum == blob_digest, ImageStorage.content_checksum == blob_digest,
ImageStorage.uploading == False) ImageStorage.uploading == False)
.limit(1)) .get())
return get_storage_by_subquery(storage_id_query) return get_storage_by_uuid(storage.uuid)
except InvalidImageException: except (ImageStorage.DoesNotExist, InvalidImageException):
# TODO(jschorr): Remove once we are no longer using the legacy tables. # TODO(jschorr): Remove once we are no longer using the legacy tables.
# Try the legacy call. # Try the legacy call.
try: try:

View file

@ -233,15 +233,6 @@ def _get_storage(query_modifier):
return found return found
def get_storage_by_subquery(subquery):
""" Returns the storage (and its locations) for the storage id returned by the subquery. The
subquery must return at most 1 result, which is a storage ID. """
def filter_by_subquery(query):
return query.where(ImageStorage.id == subquery)
return _get_storage(filter_by_subquery)
def get_storage_by_uuid(storage_uuid): def get_storage_by_uuid(storage_uuid):
def filter_to_uuid(query): def filter_to_uuid(query):
return query.where(ImageStorage.uuid == storage_uuid) return query.where(ImageStorage.uuid == storage_uuid)

View file

@ -50,7 +50,7 @@ def test_blob_caching(method, endpoint, client, app):
with patch('endpoints.v2.blob.model_cache', InMemoryDataModelCache()): with patch('endpoints.v2.blob.model_cache', InMemoryDataModelCache()):
# First request should make a DB query to retrieve the blob. # First request should make a DB query to retrieve the blob.
with assert_query_count(3): with assert_query_count(4):
conduct_call(client, 'v2.' + endpoint, url_for, method, params, expected_code=200, conduct_call(client, 'v2.' + endpoint, url_for, method, params, expected_code=200,
headers=headers) headers=headers)