From f07b940bc57dc763a296b74c0ad9d63737bf2f81 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Thu, 3 Dec 2015 16:19:22 -0500 Subject: [PATCH] Optimize blob lookup Fixes #1013 --- data/model/blob.py | 34 +++++++++++++++------------------- data/model/storage.py | 11 ++++++++++- test/test_queries.py | 21 ++++++++++++++++++++- 3 files changed, 45 insertions(+), 21 deletions(-) diff --git a/data/model/blob.py b/data/model/blob.py index 5b8d38420..b05ce6100 100644 --- a/data/model/blob.py +++ b/data/model/blob.py @@ -1,6 +1,7 @@ from uuid import uuid4 -from data.model import tag, _basequery, BlobDoesNotExist, InvalidBlobUpload, db_transaction +from data.model import (tag, _basequery, BlobDoesNotExist, InvalidBlobUpload, db_transaction, + storage as storage_model, InvalidImageException) from data.database import (Repository, Namespace, ImageStorage, Image, ImageStorageLocation, ImageStoragePlacement, BlobUpload) @@ -8,26 +9,21 @@ from data.database import (Repository, Namespace, ImageStorage, Image, ImageStor def get_repo_blob_by_digest(namespace, repo_name, blob_digest): """ Find the content-addressable blob linked to the specified repository. """ - placements = list(ImageStoragePlacement - .select(ImageStoragePlacement, ImageStorage, ImageStorageLocation) - .join(ImageStorageLocation) - .switch(ImageStoragePlacement) - .join(ImageStorage) - .join(Image) - .join(Repository) - .join(Namespace, on=(Namespace.id == Repository.namespace_user)) - .where(Repository.name == repo_name, Namespace.username == namespace, - ImageStorage.content_checksum == blob_digest, - ImageStorage.uploading == False)) - if not placements: + try: + storage_id_query = (ImageStorage + .select(ImageStorage.id) + .join(Image) + .join(Repository) + .join(Namespace, on=(Namespace.id == Repository.namespace_user)) + .where(Repository.name == repo_name, Namespace.username == namespace, + ImageStorage.content_checksum == blob_digest, + ImageStorage.uploading == False) + .limit(1)) + + return storage_model.get_storage_by_subquery(storage_id_query) + except InvalidImageException: raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest)) - found = placements[0].storage - found.locations = {placement.location.name for placement in placements - if placement.storage.id == found.id} - - return found - def store_blob_record_and_temp_link(namespace, repo_name, blob_digest, location_obj, byte_count, link_expiration_s): diff --git a/data/model/storage.py b/data/model/storage.py index 5cc18b61f..f144d157a 100644 --- a/data/model/storage.py +++ b/data/model/storage.py @@ -131,6 +131,15 @@ def _get_storage(query_modifier): return found +def get_storage_by_subquery(subquery): + """ Returns the storage (and its locations) for the storage id returned by the subquery. The + subquery must return at most 1 result, which is a storage ID. """ + def filter_by_subquery(query): + return query.where(ImageStorage.id == subquery) + + return _get_storage(filter_by_subquery) + + def get_storage_by_uuid(storage_uuid): def filter_to_uuid(query): return query.where(ImageStorage.uuid == storage_uuid) @@ -202,7 +211,7 @@ def get_storage_locations(uuid): .select() .join(ImageStorageLocation) .switch(ImageStoragePlacement) - .join(ImageStorage, JOIN_LEFT_OUTER) + .join(ImageStorage) .where(ImageStorage.uuid == uuid)) return [location.location.name for location in query] diff --git a/test/test_queries.py b/test/test_queries.py index d538bea74..17c383028 100644 --- a/test/test_queries.py +++ b/test/test_queries.py @@ -3,7 +3,7 @@ import unittest from app import app from initdb import setup_database_for_testing, finished_database_for_testing from data import model -from data.database import RepositoryBuild +from data.database import RepositoryBuild, Repository, Image, ImageStorage ADMIN_ACCESS_USER = 'devtable' SIMPLE_REPO = 'simple' @@ -45,5 +45,24 @@ class TestSpecificQueries(unittest.TestCase): self.assertEquals(created.id, result.id) self.assertEquals(created.uuid, result.uuid) + def test_lookup_repo_blob(self): + repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO) + expected = list(ImageStorage.select().join(Image).where(Image.repository == repo)) + self.assertTrue(len(expected) > 0) + + for storage in expected: + found = model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, SIMPLE_REPO, + storage.content_checksum) + self.assertEquals(found.id, storage.id) + + try: + model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, SIMPLE_REPO, 'invalidchecksum') + except model.BlobDoesNotExist: + return + + self.fail('Expected BlobDoesNotExist exception') + + + if __name__ == '__main__': unittest.main() \ No newline at end of file