Switch Quay to using an in-container memcached for data model caching

This commit is contained in:
Joseph Schorr 2017-12-19 17:13:37 -05:00
parent 5cecc54bd5
commit ab0172d2fd
14 changed files with 246 additions and 65 deletions

View file

@ -1,62 +1,23 @@
import logging
from data.cache.impl import NoopDataModelCache, InMemoryDataModelCache, MemcachedModelCache
from datetime import datetime
def get_model_cache(config):
""" Returns a data model cache matching the given configuration. """
cache_config = config.get('DATA_MODEL_CACHE_CONFIG', {})
engine = cache_config.get('engine', 'noop')
from abc import ABCMeta, abstractmethod
from six import add_metaclass
if engine == 'noop':
return NoopDataModelCache()
from util.expiresdict import ExpiresDict
from util.timedeltastring import convert_to_timedelta
if engine == 'inmemory':
return InMemoryDataModelCache()
logger = logging.getLogger(__name__)
if engine == 'memcached':
endpoint = cache_config.get('endpoint', None)
if endpoint is None:
raise Exception('Missing `endpoint` for memcached model cache configuration')
def is_not_none(value):
return value is not None
timeout = cache_config.get('timeout')
connect_timeout = cache_config.get('connect_timeout')
return MemcachedModelCache(endpoint, timeout=timeout, connect_timeout=connect_timeout)
@add_metaclass(ABCMeta)
class DataModelCache(object):
""" Defines an interface for cache storing and returning tuple data model objects. """
@abstractmethod
def retrieve(self, cache_key, loader, should_cache=is_not_none):
""" Checks the cache for the specified cache key and returns the value found (if any). If none
found, the loader is called to get a result and populate the cache.
"""
pass
class NoopDataModelCache(DataModelCache):
""" Implementation of the data model cache which does nothing. """
def retrieve(self, cache_key, loader, should_cache=is_not_none):
return loader()
class InMemoryDataModelCache(DataModelCache):
""" Implementation of the data model cache backed by an in-memory dictionary. """
def __init__(self):
self.cache = ExpiresDict(rebuilder=lambda: {})
def retrieve(self, cache_key, loader, should_cache=is_not_none):
not_found = [None]
logger.debug('Checking cache for key %s', cache_key.key)
result = self.cache.get(cache_key.key, default_value=not_found)
if result != not_found:
logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
return result
logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
result = loader()
logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
if should_cache(result):
logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
expires = convert_to_timedelta(cache_key.expiration) + datetime.now()
self.cache.set(cache_key.key, result, expires=expires)
logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
else:
logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
return result
raise Exception('Unknown model cache engine `%s`' % engine)

View file

@ -5,4 +5,4 @@ class CacheKey(namedtuple('CacheKey', ['key', 'expiration'])):
pass
def for_repository_blob(namespace_name, repo_name, digest):
return CacheKey('repository_blob:%s:%s:%s' % (namespace_name, repo_name, digest), '60s')
return CacheKey('repository_blob__%s_%s_%s' % (namespace_name, repo_name, digest), '60s')

146
data/cache/impl.py vendored Normal file
View file

@ -0,0 +1,146 @@
import logging
import json
from datetime import datetime
from abc import ABCMeta, abstractmethod
from six import add_metaclass
from pymemcache.client.base import Client
from util.expiresdict import ExpiresDict
from util.timedeltastring import convert_to_timedelta
logger = logging.getLogger(__name__)
def is_not_none(value):
return value is not None
@add_metaclass(ABCMeta)
class DataModelCache(object):
""" Defines an interface for cache storing and returning tuple data model objects. """
@abstractmethod
def retrieve(self, cache_key, loader, should_cache=is_not_none):
""" Checks the cache for the specified cache key and returns the value found (if any). If none
found, the loader is called to get a result and populate the cache.
"""
pass
class NoopDataModelCache(DataModelCache):
""" Implementation of the data model cache which does nothing. """
def retrieve(self, cache_key, loader, should_cache=is_not_none):
return loader()
class InMemoryDataModelCache(DataModelCache):
""" Implementation of the data model cache backed by an in-memory dictionary. """
def __init__(self):
self.cache = ExpiresDict(rebuilder=lambda: {})
def retrieve(self, cache_key, loader, should_cache=is_not_none):
not_found = [None]
logger.debug('Checking cache for key %s', cache_key.key)
result = self.cache.get(cache_key.key, default_value=not_found)
if result != not_found:
logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
return result
logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
result = loader()
logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
if should_cache(result):
logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
expires = convert_to_timedelta(cache_key.expiration) + datetime.now()
self.cache.set(cache_key.key, result, expires=expires)
logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
else:
logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
return result
_DEFAULT_MEMCACHE_TIMEOUT = 1 # second
_DEFAULT_MEMCACHE_CONNECT_TIMEOUT = 1 # second
_STRING_TYPE = 1
_JSON_TYPE = 2
class MemcachedModelCache(DataModelCache):
""" Implementation of the data model cache backed by a memcached. """
def __init__(self, endpoint, timeout=_DEFAULT_MEMCACHE_TIMEOUT,
connect_timeout=_DEFAULT_MEMCACHE_CONNECT_TIMEOUT):
self.endpoint = endpoint
self.timeout = timeout
self.connect_timeout = connect_timeout
self.client = None
def _get_client(self):
client = self.client
if client is not None:
return client
try:
# Copied from the doc comment for Client.
def serialize_json(key, value):
if type(value) == str:
return value, _STRING_TYPE
return json.dumps(value), _JSON_TYPE
def deserialize_json(key, value, flags):
if flags == _STRING_TYPE:
return value
if flags == _JSON_TYPE:
return json.loads(value)
raise Exception("Unknown flags for value: {1}".format(flags))
self.client = Client(self.endpoint, no_delay=True, timeout=self.timeout,
connect_timeout=self.connect_timeout,
key_prefix='data_model_cache__',
serializer=serialize_json,
deserializer=deserialize_json,
ignore_exc=True)
return self.client
except:
logger.exception('Got exception when creating memcached client to %s', self.endpoint)
return None
def retrieve(self, cache_key, loader, should_cache=is_not_none):
not_found = [None]
client = self._get_client()
if client is not None:
logger.debug('Checking cache for key %s', cache_key.key)
try:
result = client.get(cache_key.key, default=not_found)
if result != not_found:
logger.debug('Found result in cache for key %s: %s', cache_key.key, result)
return result
except:
logger.exception('Got exception when trying to retrieve key %s', cache_key.key)
logger.debug('Found no result in cache for key %s; calling loader', cache_key.key)
result = loader()
logger.debug('Got loaded result for key %s: %s', cache_key.key, result)
if client is not None and should_cache(result):
try:
logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
expires = convert_to_timedelta(cache_key.expiration) if cache_key.expiration else None
client.set(cache_key.key, result, expire=int(expires.total_seconds()) if expires else None)
logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key,
result, cache_key.expiration)
except:
logger.exception('Got exception when trying to set key %s to %s', cache_key.key, result)
else:
logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result)
return result

View file

@ -1,8 +1,21 @@
import pytest
from data.cache import InMemoryDataModelCache, NoopDataModelCache
from mock import patch
from data.cache import InMemoryDataModelCache, NoopDataModelCache, MemcachedModelCache
from data.cache.cache_key import CacheKey
class MockClient(object):
def __init__(self, server, **kwargs):
self.data = {}
def get(self, key, default=None):
return self.data.get(key, default)
def set(self, key, value, expire=None):
self.data[key] = value
@pytest.mark.parametrize('cache_type', [
(NoopDataModelCache),
(InMemoryDataModelCache),
@ -12,5 +25,32 @@ def test_caching(cache_type):
cache = cache_type()
# Perform two retrievals, and make sure both return.
assert cache.retrieve(key, lambda: 1234) == 1234
assert cache.retrieve(key, lambda: 1234) == 1234
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
def test_memcache():
key = CacheKey('foo', '60m')
with patch('data.cache.impl.Client', MockClient):
cache = MemcachedModelCache(('127.0.0.1', '-1'))
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234}
def test_memcache_should_cache():
key = CacheKey('foo', None)
def sc(value):
return value['a'] != 1234
with patch('data.cache.impl.Client', MockClient):
cache = MemcachedModelCache(('127.0.0.1', '-1'))
assert cache.retrieve(key, lambda: {'a': 1234}, should_cache=sc) == {'a': 1234}
# Ensure not cached since it was `1234`.
assert cache._get_client().get(key.key) is None
# Ensure cached.
assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345}
assert cache._get_client().get(key.key) is not None
assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345}