Switch Quay to using an in-container memcached for data model caching

This commit is contained in:
Joseph Schorr 2017-12-19 17:13:37 -05:00
parent 5cecc54bd5
commit ab0172d2fd
14 changed files with 246 additions and 65 deletions

View file

@ -2,6 +2,8 @@
FROM quay.io/quay/quay-base:latest
RUN adduser memcached --disabled-login --system
WORKDIR $QUAYDIR
COPY requirements.txt requirements-tests.txt ./

6
app.py
View file

@ -22,7 +22,7 @@ from data import model
from data.archivedlogs import LogArchive
from data.billing import Billing
from data.buildlogs import BuildLogs
from data.cache import InMemoryDataModelCache
from data.cache import get_model_cache
from data.model.user import LoginWrappedDBUser
from data.queue import WorkQueue, BuildMetricQueueReporter
from data.userevent import UserEventsBuilderModule
@ -178,9 +178,7 @@ Principal(app, use_sessions=False)
tf = app.config['DB_TRANSACTION_FACTORY']
# TODO(jschorr): make this configurable
model_cache = InMemoryDataModelCache()
model_cache = get_model_cache(app.config)
avatar = Avatar(app)
login_manager = LoginManager(app)
mail = Mail(app)

View file

@ -0,0 +1,7 @@
#!/bin/sh
# Ensure dependencies start before the logger
sv check syslog-ng > /dev/null || exit 1
# Start the logger
exec logger -i -t memcached

View file

@ -0,0 +1,12 @@
#! /bin/bash
echo 'Starting memcached'
if [ "$DEBUGLOG" == "true" ]
then
memcached -u memcached -m 64 -vv -l 127.0.0.1 -p 18080
else
memcached -u memcached -m 64 -l 127.0.0.1 -p 18080
fi
echo 'memcached exited'

View file

@ -513,3 +513,9 @@ class DefaultConfig(ImmutableConfig):
# For Billing Support Only: The number of allowed builds on a namespace that has been billed
# successfully.
BILLED_NAMESPACE_MAXIMUM_BUILD_COUNT = None
# Configuration for the data model cache.
DATA_MODEL_CACHE_CONFIG = {
'engine': 'memcached',
'endpoint': ('127.0.0.1', 18080),
}

View file

@ -1,62 +1,23 @@
import logging
from data.cache.impl import NoopDataModelCache, InMemoryDataModelCache, MemcachedModelCache
from datetime import datetime
def get_model_cache(config):
""" Returns a data model cache matching the given configuration. """
cache_config = config.get('DATA_MODEL_CACHE_CONFIG', {})
engine = cache_config.get('engine', 'noop')
from abc import ABCMeta, abstractmethod
from six import add_metaclass
if engine == 'noop':
return NoopDataModelCache()
from util.expiresdict import ExpiresDict
from util.timedeltastring import convert_to_timedelta
if engine == 'inmemory':
return InMemoryDataModelCache()
logger = logging.getLogger(__name__)
if engine == 'memcached':
endpoint = cache_config.get('endpoint', None)
if endpoint is None:
raise Exception('Missing `endpoint` for memcached model cache configuration')
def is_not_none(value):
return value is not None
timeout = cache_config.get('timeout')
connect_timeout = cache_config.get('connect_timeout')
return MemcachedModelCache(endpoint, timeout=timeout, connect_timeout=connect_timeout)
@add_metaclass(ABCMeta)
class DataModelCache(object):
""" Defines an interface for cache storing and returning tuple data model objects. """
@abstractmethod
def retrieve(self, cache_key, loader, should_cache=is_not_none):
""" Checks the cache for the specified cache key and returns the value found (if any). If none
found, the loader is called to get a result and populate the cache.
"""
pass
class NoopDataModelCache(DataModelCache):
""" Implementation of the data model cache which does nothing. """
def retrieve(self, cache_key, loader, should_cache=is_not_none):
return loader()
class InMemoryDataModelCache(DataModelCache):
""" Implementation of the data model cache backed by an in-memory dictionary. """
def __init__(self):
self.cache = ExpiresDict(rebuilder=lambda: {})
def retrieve(self, cache_key, loader, should_cache=is_not_none):
not_found = [None]
logger.debug('Checking cache for key %s', cache_key.key)
result = self.cache.get(cache_key.key, default_value=not_found)
if result != not_found:
logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
return result
logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
result = loader()
logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
if should_cache(result):
logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
expires = convert_to_timedelta(cache_key.expiration) + datetime.now()
self.cache.set(cache_key.key, result, expires=expires)
logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
else:
logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
return result
raise Exception('Unknown model cache engine `%s`' % engine)

View file

@ -5,4 +5,4 @@ class CacheKey(namedtuple('CacheKey', ['key', 'expiration'])):
pass
def for_repository_blob(namespace_name, repo_name, digest):
return CacheKey('repository_blob:%s:%s:%s' % (namespace_name, repo_name, digest), '60s')
return CacheKey('repository_blob__%s_%s_%s' % (namespace_name, repo_name, digest), '60s')

146
data/cache/impl.py vendored Normal file
View file

@ -0,0 +1,146 @@
import logging
import json
from datetime import datetime
from abc import ABCMeta, abstractmethod
from six import add_metaclass
from pymemcache.client.base import Client
from util.expiresdict import ExpiresDict
from util.timedeltastring import convert_to_timedelta
logger = logging.getLogger(__name__)
def is_not_none(value):
return value is not None
@add_metaclass(ABCMeta)
class DataModelCache(object):
""" Defines an interface for cache storing and returning tuple data model objects. """
@abstractmethod
def retrieve(self, cache_key, loader, should_cache=is_not_none):
""" Checks the cache for the specified cache key and returns the value found (if any). If none
found, the loader is called to get a result and populate the cache.
"""
pass
class NoopDataModelCache(DataModelCache):
""" Implementation of the data model cache which does nothing. """
def retrieve(self, cache_key, loader, should_cache=is_not_none):
return loader()
class InMemoryDataModelCache(DataModelCache):
""" Implementation of the data model cache backed by an in-memory dictionary. """
def __init__(self):
self.cache = ExpiresDict(rebuilder=lambda: {})
def retrieve(self, cache_key, loader, should_cache=is_not_none):
not_found = [None]
logger.debug('Checking cache for key %s', cache_key.key)
result = self.cache.get(cache_key.key, default_value=not_found)
if result != not_found:
logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
return result
logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
result = loader()
logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
if should_cache(result):
logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
expires = convert_to_timedelta(cache_key.expiration) + datetime.now()
self.cache.set(cache_key.key, result, expires=expires)
logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
else:
logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
return result
_DEFAULT_MEMCACHE_TIMEOUT = 1 # second
_DEFAULT_MEMCACHE_CONNECT_TIMEOUT = 1 # second
_STRING_TYPE = 1
_JSON_TYPE = 2
class MemcachedModelCache(DataModelCache):
""" Implementation of the data model cache backed by a memcached. """
def __init__(self, endpoint, timeout=_DEFAULT_MEMCACHE_TIMEOUT,
connect_timeout=_DEFAULT_MEMCACHE_CONNECT_TIMEOUT):
self.endpoint = endpoint
self.timeout = timeout
self.connect_timeout = connect_timeout
self.client = None
def _get_client(self):
client = self.client
if client is not None:
return client
try:
# Copied from the doc comment for Client.
def serialize_json(key, value):
if type(value) == str:
return value, _STRING_TYPE
return json.dumps(value), _JSON_TYPE
def deserialize_json(key, value, flags):
if flags == _STRING_TYPE:
return value
if flags == _JSON_TYPE:
return json.loads(value)
raise Exception("Unknown flags for value: {1}".format(flags))
self.client = Client(self.endpoint, no_delay=True, timeout=self.timeout,
connect_timeout=self.connect_timeout,
key_prefix='data_model_cache__',
serializer=serialize_json,
deserializer=deserialize_json,
ignore_exc=True)
return self.client
except:
logger.exception('Got exception when creating memcached client to %s', self.endpoint)
return None
def retrieve(self, cache_key, loader, should_cache=is_not_none):
not_found = [None]
client = self._get_client()
if client is not None:
logger.debug('Checking cache for key %s', cache_key.key)
try:
result = client.get(cache_key.key, default=not_found)
if result != not_found:
logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
return result
except:
logger.exception('Got exception when trying to retrieve key %s', cache_key.key)
logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
result = loader()
logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
if client is not None and should_cache(result):
try:
logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
expires = convert_to_timedelta(cache_key.expiration) if cache_key.expiration else None
client.set(cache_key.key, result, expire=int(expires.total_seconds()) if expires else None)
logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
except:
logger.exception('Got exception when trying to set key %s to %s', cache_key.key, result)
else:
logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
return result

View file

@ -1,8 +1,21 @@
import pytest
from data.cache import InMemoryDataModelCache, NoopDataModelCache
from mock import patch
from data.cache import InMemoryDataModelCache, NoopDataModelCache, MemcachedModelCache
from data.cache.cache_key import CacheKey
class MockClient(object):
def __init__(self, server, **kwargs):
self.data = {}
def get(self, key, default=None):
return self.data.get(key, default)
def set(self, key, value, expire=None):
self.data[key] = value
@pytest.mark.parametrize('cache_type', [
(NoopDataModelCache),
(InMemoryDataModelCache),
@ -12,5 +25,32 @@ def test_caching(cache_type):
cache = cache_type()
# Perform two retrievals, and make sure both return.
assert cache.retrieve(key, lambda: 1234) == 1234
assert cache.retrieve(key, lambda: 1234) == 1234
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
def test_memcache():
key = CacheKey('foo', '60m')
with patch('data.cache.impl.Client', MockClient):
cache = MemcachedModelCache(('127.0.0.1', '-1'))
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
def test_memcache_should_cache():
key = CacheKey('foo', None)
def sc(value):
return value['a'] != 1234
with patch('data.cache.impl.Client', MockClient):
cache = MemcachedModelCache(('127.0.0.1', '-1'))
assert cache.retrieve(key, lambda: {'a': 1234}, should_cache=sc) == {'a': 1234}
# Ensure not cached since it was `1234`.
assert cache._get_client().get(key.key) is None
# Ensure cached.
assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345}
assert cache._get_client().get(key.key) is not None
assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345}

View file

@ -44,6 +44,7 @@ RUN apt-get update && apt-get upgrade -y \
libpq5 \
libsasl2-dev \
libsasl2-modules \
memcached \
monit \
nginx \
nodejs \
@ -54,7 +55,7 @@ RUN apt-get update && apt-get upgrade -y \
python-pip \
python-virtualenv \
yarn=0.22.0-1 \
w3m # 13DEC2017
w3m # 19DEC2017
# Install cfssl
RUN mkdir /gocode

View file

@ -78,3 +78,4 @@ xhtml2pdf
recaptcha2
mockredispy
yapf
pymemcache

View file

@ -108,6 +108,7 @@ pyjwkest==1.4.0
PyJWT==1.5.3
PyMySQL==0.6.7
pyOpenSSL==17.5.0
pymemcache==1.4.3
pyparsing==2.2.0
PyPDF2==1.26.0
python-dateutil==2.6.1

View file

@ -102,3 +102,7 @@ class TestConfig(DefaultConfig):
TAG_EXPIRATION_OPTIONS = ['0s', '1s', '1d', '1w', '2w', '4w']
DEFAULT_NAMESPACE_MAXIMUM_BUILD_COUNT = None
DATA_MODEL_CACHE_CONFIG = {
'engine': 'inmemory',
}

View file

@ -80,6 +80,8 @@ INTERNAL_ONLY_PROPERTIES = {
'SECURITY_SCANNER_READONLY_FAILOVER_ENDPOINTS',
'SECURITY_SCANNER_API_VERSION',
'DATA_MODEL_CACHE_CONFIG',
# TODO: move this into the schema once we support signing in QE.
'FEATURE_SIGNING',
'TUF_SERVER',