Optimize blob lookup

Fixes #1013
This commit is contained in:
Joseph Schorr 2015-12-03 16:19:22 -05:00
parent 597d6ecd3c
commit f07b940bc5
3 changed files with 45 additions and 21 deletions

View file

@ -1,6 +1,7 @@
from uuid import uuid4
from data.model import tag, _basequery, BlobDoesNotExist, InvalidBlobUpload, db_transaction
from data.model import (tag, _basequery, BlobDoesNotExist, InvalidBlobUpload, db_transaction,
storage as storage_model, InvalidImageException)
from data.database import (Repository, Namespace, ImageStorage, Image, ImageStorageLocation,
ImageStoragePlacement, BlobUpload)
@ -8,26 +9,21 @@ from data.database import (Repository, Namespace, ImageStorage, Image, ImageStor
def get_repo_blob_by_digest(namespace, repo_name, blob_digest):
""" Find the content-addressable blob linked to the specified repository.
"""
placements = list(ImageStoragePlacement
.select(ImageStoragePlacement, ImageStorage, ImageStorageLocation)
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage)
try:
storage_id_query = (ImageStorage
.select(ImageStorage.id)
.join(Image)
.join(Repository)
.join(Namespace, on=(Namespace.id == Repository.namespace_user))
.where(Repository.name == repo_name, Namespace.username == namespace,
ImageStorage.content_checksum == blob_digest,
ImageStorage.uploading == False))
if not placements:
ImageStorage.uploading == False)
.limit(1))
return storage_model.get_storage_by_subquery(storage_id_query)
except InvalidImageException:
raise BlobDoesNotExist('Blob does not exist with digest: {0}'.format(blob_digest))
found = placements[0].storage
found.locations = {placement.location.name for placement in placements
if placement.storage.id == found.id}
return found
def store_blob_record_and_temp_link(namespace, repo_name, blob_digest, location_obj, byte_count,
link_expiration_s):

View file

@ -131,6 +131,15 @@ def _get_storage(query_modifier):
return found
def get_storage_by_subquery(subquery):
""" Returns the storage (and its locations) for the storage id returned by the subquery. The
subquery must return at most 1 result, which is a storage ID. """
def filter_by_subquery(query):
return query.where(ImageStorage.id == subquery)
return _get_storage(filter_by_subquery)
def get_storage_by_uuid(storage_uuid):
def filter_to_uuid(query):
return query.where(ImageStorage.uuid == storage_uuid)
@ -202,7 +211,7 @@ def get_storage_locations(uuid):
.select()
.join(ImageStorageLocation)
.switch(ImageStoragePlacement)
.join(ImageStorage, JOIN_LEFT_OUTER)
.join(ImageStorage)
.where(ImageStorage.uuid == uuid))
return [location.location.name for location in query]

View file

@ -3,7 +3,7 @@ import unittest
from app import app
from initdb import setup_database_for_testing, finished_database_for_testing
from data import model
from data.database import RepositoryBuild
from data.database import RepositoryBuild, Repository, Image, ImageStorage
ADMIN_ACCESS_USER = 'devtable'
SIMPLE_REPO = 'simple'
@ -45,5 +45,24 @@ class TestSpecificQueries(unittest.TestCase):
self.assertEquals(created.id, result.id)
self.assertEquals(created.uuid, result.uuid)
def test_lookup_repo_blob(self):
repo = model.repository.get_repository(ADMIN_ACCESS_USER, SIMPLE_REPO)
expected = list(ImageStorage.select().join(Image).where(Image.repository == repo))
self.assertTrue(len(expected) > 0)
for storage in expected:
found = model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, SIMPLE_REPO,
storage.content_checksum)
self.assertEquals(found.id, storage.id)
try:
model.blob.get_repo_blob_by_digest(ADMIN_ACCESS_USER, SIMPLE_REPO, 'invalidchecksum')
except model.BlobDoesNotExist:
return
self.fail('Expected BlobDoesNotExist exception')
if __name__ == '__main__':
unittest.main()