Enable caching of blobs in V2 registry protocol, to avoid DB connections after the cache has been loaded

This should help for bursty pull traffic, as it will avoid DB connections on a huge % of requests
This commit is contained in:
Joseph Schorr 2017-12-14 13:38:24 -05:00
parent db6007cb37
commit b2485934ed
5 changed files with 112 additions and 13 deletions

View file

@ -7,9 +7,10 @@ from flask import url_for, request, redirect, Response, abort as flask_abort
import bitmath
import resumablehashlib
from app import storage, app, get_app_url, metric_queue
from app import storage, app, get_app_url, metric_queue, model_cache
from auth.registry_jwt_auth import process_registry_jwt_auth
from data import database
from data.cache import cache_key
from digest import digest_tools
from endpoints.decorators import anon_protect, parse_repository_name
from endpoints.v2 import v2_bp, require_repo_read, require_repo_write, get_input_stream
@ -33,6 +34,18 @@ class _InvalidRangeHeader(Exception):
pass
def _get_repository_blob(namespace_name, repo_name, digest):
""" Returns the blob with the given digest under the repository with the given
name. If one does not exist (or it is still uploading), returns None.
Automatically handles caching.
"""
def load_blob():
return model.get_blob_by_digest(namespace_name, repo_name, digest)
blob_cache_key = cache_key.for_repository_blob(namespace_name, repo_name, digest)
return model_cache.retrieve(blob_cache_key, load_blob)
@v2_bp.route(BLOB_DIGEST_ROUTE, methods=['HEAD'])
@parse_repository_name()
@process_registry_jwt_auth(scopes=['pull'])
@ -41,7 +54,7 @@ class _InvalidRangeHeader(Exception):
@cache_control(max_age=31436000)
def check_blob_exists(namespace_name, repo_name, digest):
# Find the blob.
blob = model.get_blob_by_digest(namespace_name, repo_name, digest)
blob = _get_repository_blob(namespace_name, repo_name, digest)
if blob is None:
raise BlobUnknown()
@ -49,7 +62,8 @@ def check_blob_exists(namespace_name, repo_name, digest):
headers = {
'Docker-Content-Digest': digest,
'Content-Length': blob.size,
'Content-Type': BLOB_CONTENT_TYPE,}
'Content-Type': BLOB_CONTENT_TYPE,
}
# If our storage supports range requests, let the client know.
if storage.get_supports_resumable_downloads(blob.locations):
@ -67,7 +81,7 @@ def check_blob_exists(namespace_name, repo_name, digest):
@cache_control(max_age=31536000)
def download_blob(namespace_name, repo_name, digest):
# Find the blob.
blob = model.get_blob_by_digest(namespace_name, repo_name, digest)
blob = _get_repository_blob(namespace_name, repo_name, digest)
if blob is None:
raise BlobUnknown()

View file

@ -0,0 +1,59 @@
import hashlib
import pytest
from mock import patch
from flask import url_for
from playhouse.test_utils import assert_query_count
from app import instance_keys, app as realapp
from data import model
from data.cache import InMemoryDataModelCache
from data.database import ImageStorageLocation
from endpoints.test.shared import conduct_call
from util.security.registry_jwt import generate_bearer_token, build_context_and_subject
from test.fixtures import *
@pytest.mark.parametrize('method, endpoint', [
('GET', 'download_blob'),
('HEAD', 'check_blob_exists'),
])
def test_blob_caching(method, endpoint, client, app):
digest = 'sha256:' + hashlib.sha256("a").hexdigest()
location = ImageStorageLocation.get(name='local_us')
model.blob.store_blob_record_and_temp_link('devtable', 'simple', digest, location, 1, 10000000)
params = {
'repository': 'devtable/simple',
'digest': digest,
}
user = model.user.get_user('devtable')
access = [{
'type': 'repository',
'name': 'devtable/simple',
'actions': ['pull'],
}]
context, subject = build_context_and_subject(user=user)
token = generate_bearer_token(realapp.config['SERVER_HOSTNAME'], subject, context, access, 600,
instance_keys)
headers = {
'Authorization': 'Bearer %s' % token,
}
# Run without caching to make sure the request works. This also preloads some of
# our global model caches.
conduct_call(client, 'v2.' + endpoint, url_for, method, params, expected_code=200,
headers=headers)
with patch('endpoints.v2.blob.model_cache', InMemoryDataModelCache()):
# First request should make a DB query to retrieve the blob.
with assert_query_count(1):
conduct_call(client, 'v2.' + endpoint, url_for, method, params, expected_code=200,
headers=headers)
# Subsequent requests should use the cached blob.
with assert_query_count(0):
conduct_call(client, 'v2.' + endpoint, url_for, method, params, expected_code=200,
headers=headers)