Optimize blob lookup

Fixes #1013
This commit is contained in:
Joseph Schorr 2015-12-03 16:19:22 -05:00
parent 597d6ecd3c
commit f07b940bc5
3 changed files with 45 additions and 21 deletions

View file

@ -1,6 +1,7 @@
from uuid import uuid4 from uuid import uuid4
from data.model import tag, _basequery, BlobDoesNotExist, InvalidBlobUpload, db_transaction from data.model import (tag, _basequery, BlobDoesNotExist, InvalidBlobUpload, db_transaction,
storage as storage_model, InvalidImageException)
from data.database import (Repository, Namespace, ImageStorage, Image, ImageStorageLocation, from data.database import (Repository, Namespace, ImageStorage, Image, ImageStorageLocation,
ImageStoragePlacement, BlobUpload) ImageStoragePlacement, BlobUpload)
@ -8,26 +9,21 @@ from data.database import (Repository, Namespace, ImageStorage, Image, ImageStor
def get_repo_blob_by_digest(namespace, repo_name, blob_digest): def get_repo_blob_by_digest(namespace, repo_name, blob_digest):
""" Find the content-addressable blob linked to the specified repository. """ Find the content-addressable blob linked to the specified repository.
""" """
placements = list(ImageStoragePlacement try:
.select(ImageStoragePlacement, ImageStorage, ImageStorageLocation) storage_id_query = (ImageStorage
.join(ImageStorageLocation) .select(ImageStorage.id)
.switch(ImageStoragePlacement) .join(Image)
.join(ImageStorage) .join(Repository)
.join(Image) .join(Namespace, on=(Namespace.id == Repository.namespace_user))
.join(Repository) .where(Repository.name == repo_name, Namespace.username == namespace,
.join(Namespace, on=(Namespace.id == Repository.namespace_user)) ImageStorage.content_checksum == blob_digest,
.where(Repository.name == repo_name, Namespace.username == namespace, ImageStorage.uploading == False)
ImageStorage.content_checksum == blob_digest, .limit(1))
ImageStorage.uploading == False))
if not placements: return storage_model.get_storage_by_subquery(storage_id_query)
except InvalidImageException:
raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest))
found = placements[0].storage
found.locations = {placement.location.name for placement in placements
if placement.storage.id == found.id}
return found
def store_blob_record_and_temp_link(namespace, repo_name, blob_digest, location_obj, byte_count, def store_blob_record_and_temp_link(namespace, repo_name, blob_digest, location_obj, byte_count,
link_expiration_s): link_expiration_s):

View file

@ -131,6 +131,15 @@ def _get_storage(query_modifier):
return found return found
def get_storage_by_subquery(subquery):
""" Returns the storage (and its locations) for the storage id returned by the subquery. The
subquery must return at most 1 result, which is a storage ID. """
def filter_by_subquery(query):
return query.where(ImageStorage.id == subquery)
return _get_storage(filter_by_subquery)
def get_storage_by_uuid(storage_uuid): def get_storage_by_uuid(storage_uuid):
def filter_to_uuid(query): def filter_to_uuid(query):
return query.where(ImageStorage.uuid == storage_uuid) return query.where(ImageStorage.uuid == storage_uuid)
@ -202,7 +211,7 @@ def get_storage_locations(uuid):
.select() .select()
.join(ImageStorageLocation) .join(ImageStorageLocation)
.switch(ImageStoragePlacement) .switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER) .join(ImageStorage)
.where(ImageStorage.uuid == uuid)) .where(ImageStorage.uuid == uuid))
return [location.location.name for location in query] return [location.location.name for location in query]

View file

@ -3,7 +3,7 @@ import unittest
from app import app from app import app
from initdb import setup_database_for_testing, finished_database_for_testing from initdb import setup_database_for_testing, finished_database_for_testing
from data import model from data import model
from data.database import RepositoryBuild from data.database import RepositoryBuild, Repository, Image, ImageStorage
ADMIN_ACCESS_USER = 'devtable' ADMIN_ACCESS_USER = 'devtable'
SIMPLE_REPO = 'simple' SIMPLE_REPO = 'simple'
@ -45,5 +45,24 @@ class TestSpecificQueries(unittest.TestCase):
self.assertEquals(created.id, result.id) self.assertEquals(created.id, result.id)
self.assertEquals(created.uuid, result.uuid) self.assertEquals(created.uuid, result.uuid)
def test_lookup_repo_blob(self):
repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO)
expected = list(ImageStorage.select().join(Image).where(Image.repository == repo))
self.assertTrue(len(expected) > 0)
for storage in expected:
found = model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, SIMPLE_REPO,
storage.content_checksum)
self.assertEquals(found.id, storage.id)
try:
model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, SIMPLE_REPO, 'invalidchecksum')
except model.BlobDoesNotExist:
return
self.fail('Expected BlobDoesNotExist exception')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()