Switch Quay to using an in-container memcached for data model caching

This commit is contained in:
Joseph Schorr 2017-12-19 17:13:37 -05:00
parent 5cecc54bd5
commit ab0172d2fd
14 changed files with 246 additions and 65 deletions

View file

@ -2,6 +2,8 @@
FROM quay.io/quay/quay-base:latest FROM quay.io/quay/quay-base:latest
RUN adduser memcached --disabled-login --system
WORKDIR $QUAYDIR WORKDIR $QUAYDIR
COPY requirements.txt requirements-tests.txt ./ COPY requirements.txt requirements-tests.txt ./

6
app.py
View file

@ -22,7 +22,7 @@ from data import model
from data.archivedlogs import LogArchive from data.archivedlogs import LogArchive
from data.billing import Billing from data.billing import Billing
from data.buildlogs import BuildLogs from data.buildlogs import BuildLogs
from data.cache import InMemoryDataModelCache from data.cache import get_model_cache
from data.model.user import LoginWrappedDBUser from data.model.user import LoginWrappedDBUser
from data.queue import WorkQueue, BuildMetricQueueReporter from data.queue import WorkQueue, BuildMetricQueueReporter
from data.userevent import UserEventsBuilderModule from data.userevent import UserEventsBuilderModule
@ -178,9 +178,7 @@ Principal(app, use_sessions=False)
tf = app.config['DB_TRANSACTION_FACTORY'] tf = app.config['DB_TRANSACTION_FACTORY']
# TODO(jschorr): make this configurable model_cache = get_model_cache(app.config)
model_cache = InMemoryDataModelCache()
avatar = Avatar(app) avatar = Avatar(app)
login_manager = LoginManager(app) login_manager = LoginManager(app)
mail = Mail(app) mail = Mail(app)

View file

@ -0,0 +1,7 @@
#!/bin/sh
# Ensure dependencies start before the logger
sv check syslog-ng > /dev/null || exit 1
# Start the logger
exec logger -i -t memcached

View file

@ -0,0 +1,12 @@
#! /bin/bash
echo 'Starting memcached'
if [ "$DEBUGLOG" == "true" ]
then
memcached -u memcached -m 64 -vv -l 127.0.0.1 -p 18080
else
memcached -u memcached -m 64 -l 127.0.0.1 -p 18080
fi
echo 'memcached exited'

View file

@ -513,3 +513,9 @@ class DefaultConfig(ImmutableConfig):
# For Billing Support Only: The number of allowed builds on a namespace that has been billed # For Billing Support Only: The number of allowed builds on a namespace that has been billed
# successfully. # successfully.
BILLED_NAMESPACE_MAXIMUM_BUILD_COUNT = None BILLED_NAMESPACE_MAXIMUM_BUILD_COUNT = None
# Configuration for the data model cache.
DATA_MODEL_CACHE_CONFIG = {
'engine': 'memcached',
'endpoint': ('127.0.0.1', 18080),
}

View file

@ -1,62 +1,23 @@
import logging from data.cache.impl import NoopDataModelCache, InMemoryDataModelCache, MemcachedModelCache
from datetime import datetime def get_model_cache(config):
""" Returns a data model cache matching the given configuration. """
cache_config = config.get('DATA_MODEL_CACHE_CONFIG', {})
engine = cache_config.get('engine', 'noop')
from abc import ABCMeta, abstractmethod if engine == 'noop':
from six import add_metaclass return NoopDataModelCache()
from util.expiresdict import ExpiresDict if engine == 'inmemory':
from util.timedeltastring import convert_to_timedelta return InMemoryDataModelCache()
logger = logging.getLogger(__name__) if engine == 'memcached':
endpoint = cache_config.get('endpoint', None)
if endpoint is None:
raise Exception('Missing `endpoint` for memcached model cache configuration')
def is_not_none(value): timeout = cache_config.get('timeout')
return value is not None connect_timeout = cache_config.get('connect_timeout')
return MemcachedModelCache(endpoint, timeout=timeout, connect_timeout=connect_timeout)
raise Exception('Unknown model cache engine `%s`' % engine)
@add_metaclass(ABCMeta)
class DataModelCache(object):
""" Defines an interface for cache storing and returning tuple data model objects. """
@abstractmethod
def retrieve(self, cache_key, loader, should_cache=is_not_none):
""" Checks the cache for the specified cache key and returns the value found (if any). If none
found, the loader is called to get a result and populate the cache.
"""
pass
class NoopDataModelCache(DataModelCache):
""" Implementation of the data model cache which does nothing. """
def retrieve(self, cache_key, loader, should_cache=is_not_none):
return loader()
class InMemoryDataModelCache(DataModelCache):
""" Implementation of the data model cache backed by an in-memory dictionary. """
def __init__(self):
self.cache = ExpiresDict(rebuilder=lambda: {})
def retrieve(self, cache_key, loader, should_cache=is_not_none):
not_found = [None]
logger.debug('Checking cache for key %s', cache_key.key)
result = self.cache.get(cache_key.key, default_value=not_found)
if result != not_found:
logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
return result
logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
result = loader()
logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
if should_cache(result):
logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
expires = convert_to_timedelta(cache_key.expiration) + datetime.now()
self.cache.set(cache_key.key, result, expires=expires)
logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
else:
logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
return result

View file

@ -5,4 +5,4 @@ class CacheKey(namedtuple('CacheKey', ['key', 'expiration'])):
pass pass
def for_repository_blob(namespace_name, repo_name, digest): def for_repository_blob(namespace_name, repo_name, digest):
return CacheKey('repository_blob:%s:%s:%s' % (namespace_name, repo_name, digest), '60s') return CacheKey('repository_blob__%s_%s_%s' % (namespace_name, repo_name, digest), '60s')

146
data/cache/impl.py vendored Normal file
View file

@ -0,0 +1,146 @@
import logging
import json
from datetime import datetime
from abc import ABCMeta, abstractmethod
from six import add_metaclass
from pymemcache.client.base import Client
from util.expiresdict import ExpiresDict
from util.timedeltastring import convert_to_timedelta
logger = logging.getLogger(__name__)
def is_not_none(value):
return value is not None
@add_metaclass(ABCMeta)
class DataModelCache(object):
""" Defines an interface for cache storing and returning tuple data model objects. """
@abstractmethod
def retrieve(self, cache_key, loader, should_cache=is_not_none):
""" Checks the cache for the specified cache key and returns the value found (if any). If none
found, the loader is called to get a result and populate the cache.
"""
pass
class NoopDataModelCache(DataModelCache):
""" Implementation of the data model cache which does nothing. """
def retrieve(self, cache_key, loader, should_cache=is_not_none):
return loader()
class InMemoryDataModelCache(DataModelCache):
""" Implementation of the data model cache backed by an in-memory dictionary. """
def __init__(self):
self.cache = ExpiresDict(rebuilder=lambda: {})
def retrieve(self, cache_key, loader, should_cache=is_not_none):
not_found = [None]
logger.debug('Checking cache for key %s', cache_key.key)
result = self.cache.get(cache_key.key, default_value=not_found)
if result != not_found:
logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
return result
logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
result = loader()
logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
if should_cache(result):
logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
expires = convert_to_timedelta(cache_key.expiration) + datetime.now()
self.cache.set(cache_key.key, result, expires=expires)
logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
else:
logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
return result
_DEFAULT_MEMCACHE_TIMEOUT = 1 # second
_DEFAULT_MEMCACHE_CONNECT_TIMEOUT = 1 # second
_STRING_TYPE = 1
_JSON_TYPE = 2
class MemcachedModelCache(DataModelCache):
""" Implementation of the data model cache backed by a memcached. """
def __init__(self, endpoint, timeout=_DEFAULT_MEMCACHE_TIMEOUT,
connect_timeout=_DEFAULT_MEMCACHE_CONNECT_TIMEOUT):
self.endpoint = endpoint
self.timeout = timeout
self.connect_timeout = connect_timeout
self.client = None
def _get_client(self):
client = self.client
if client is not None:
return client
try:
# Copied from the doc comment for Client.
def serialize_json(key, value):
if type(value) == str:
return value, _STRING_TYPE
return json.dumps(value), _JSON_TYPE
def deserialize_json(key, value, flags):
if flags == _STRING_TYPE:
return value
if flags == _JSON_TYPE:
return json.loads(value)
raise Exception("Unknown flags for value: {1}".format(flags))
self.client = Client(self.endpoint, no_delay=True, timeout=self.timeout,
connect_timeout=self.connect_timeout,
key_prefix='data_model_cache__',
serializer=serialize_json,
deserializer=deserialize_json,
ignore_exc=True)
return self.client
except:
logger.exception('Got exception when creating memcached client to %s', self.endpoint)
return None
def retrieve(self, cache_key, loader, should_cache=is_not_none):
not_found = [None]
client = self._get_client()
if client is not None:
logger.debug('Checking cache for key %s', cache_key.key)
try:
result = client.get(cache_key.key, default=not_found)
if result != not_found:
logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
return result
except:
logger.exception('Got exception when trying to retrieve key %s', cache_key.key)
logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
result = loader()
logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
if client is not None and should_cache(result):
try:
logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
expires = convert_to_timedelta(cache_key.expiration) if cache_key.expiration else None
client.set(cache_key.key, result, expire=int(expires.total_seconds()) if expires else None)
logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
except:
logger.exception('Got exception when trying to set key %s to %s', cache_key.key, result)
else:
logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
return result

View file

@ -1,8 +1,21 @@
import pytest import pytest
from data.cache import InMemoryDataModelCache, NoopDataModelCache from mock import patch
from data.cache import InMemoryDataModelCache, NoopDataModelCache, MemcachedModelCache
from data.cache.cache_key import CacheKey from data.cache.cache_key import CacheKey
class MockClient(object):
def __init__(self, server, **kwargs):
self.data = {}
def get(self, key, default=None):
return self.data.get(key, default)
def set(self, key, value, expire=None):
self.data[key] = value
@pytest.mark.parametrize('cache_type', [ @pytest.mark.parametrize('cache_type', [
(NoopDataModelCache), (NoopDataModelCache),
(InMemoryDataModelCache), (InMemoryDataModelCache),
@ -12,5 +25,32 @@ def test_caching(cache_type):
cache = cache_type() cache = cache_type()
# Perform two retrievals, and make sure both return. # Perform two retrievals, and make sure both return.
assert cache.retrieve(key, lambda: 1234) == 1234 assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
assert cache.retrieve(key, lambda: 1234) == 1234 assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
def test_memcache():
key = CacheKey('foo', '60m')
with patch('data.cache.impl.Client', MockClient):
cache = MemcachedModelCache(('127.0.0.1', '-1'))
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
def test_memcache_should_cache():
key = CacheKey('foo', None)
def sc(value):
return value['a'] != 1234
with patch('data.cache.impl.Client', MockClient):
cache = MemcachedModelCache(('127.0.0.1', '-1'))
assert cache.retrieve(key, lambda: {'a': 1234}, should_cache=sc) == {'a': 1234}
# Ensure not cached since it was `1234`.
assert cache._get_client().get(key.key) is None
# Ensure cached.
assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345}
assert cache._get_client().get(key.key) is not None
assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345}

View file

@ -44,6 +44,7 @@ RUN apt-get update && apt-get upgrade -y \
libpq5 \ libpq5 \
libsasl2-dev \ libsasl2-dev \
libsasl2-modules \ libsasl2-modules \
memcached \
monit \ monit \
nginx \ nginx \
nodejs \ nodejs \
@ -54,7 +55,7 @@ RUN apt-get update && apt-get upgrade -y \
python-pip \ python-pip \
python-virtualenv \ python-virtualenv \
yarn=0.22.0-1 \ yarn=0.22.0-1 \
w3m # 13DEC2017 w3m # 19DEC2017
# Install cfssl # Install cfssl
RUN mkdir /gocode RUN mkdir /gocode

View file

@ -78,3 +78,4 @@ xhtml2pdf
recaptcha2 recaptcha2
mockredispy mockredispy
yapf yapf
pymemcache

View file

@ -108,6 +108,7 @@ pyjwkest==1.4.0
PyJWT==1.5.3 PyJWT==1.5.3
PyMySQL==0.6.7 PyMySQL==0.6.7
pyOpenSSL==17.5.0 pyOpenSSL==17.5.0
pymemcache==1.4.3
pyparsing==2.2.0 pyparsing==2.2.0
PyPDF2==1.26.0 PyPDF2==1.26.0
python-dateutil==2.6.1 python-dateutil==2.6.1

View file

@ -102,3 +102,7 @@ class TestConfig(DefaultConfig):
TAG_EXPIRATION_OPTIONS = ['0s', '1s', '1d', '1w', '2w', '4w'] TAG_EXPIRATION_OPTIONS = ['0s', '1s', '1d', '1w', '2w', '4w']
DEFAULT_NAMESPACE_MAXIMUM_BUILD_COUNT = None DEFAULT_NAMESPACE_MAXIMUM_BUILD_COUNT = None
DATA_MODEL_CACHE_CONFIG = {
'engine': 'inmemory',
}

View file

@ -80,6 +80,8 @@ INTERNAL_ONLY_PROPERTIES = {
'SECURITY_SCANNER_READONLY_FAILOVER_ENDPOINTS', 'SECURITY_SCANNER_READONLY_FAILOVER_ENDPOINTS',
'SECURITY_SCANNER_API_VERSION', 'SECURITY_SCANNER_API_VERSION',
'DATA_MODEL_CACHE_CONFIG',
# TODO: move this into the schema once we support signing in QE. # TODO: move this into the schema once we support signing in QE.
'FEATURE_SIGNING', 'FEATURE_SIGNING',
'TUF_SERVER', 'TUF_SERVER',