From ab0172d2fdd368df96d889b1e9b14280c0c14d1a Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 19 Dec 2017 17:13:37 -0500 Subject: [PATCH] Switch Quay to using an in-container memcached for data model caching --- Dockerfile | 2 + app.py | 6 +- .../service/interactive/memcached/log/run | 7 + conf/init/service/interactive/memcached/run | 12 ++ config.py | 6 + data/cache/__init__.py | 73 ++------- data/cache/cache_key.py | 2 +- data/cache/impl.py | 146 ++++++++++++++++++ data/cache/test/test_cache.py | 46 +++++- quay-base.dockerfile | 3 +- requirements-nover.txt | 1 + requirements.txt | 1 + test/testconfig.py | 4 + util/config/schema.py | 2 + 14 files changed, 246 insertions(+), 65 deletions(-) create mode 100755 conf/init/service/interactive/memcached/log/run create mode 100755 conf/init/service/interactive/memcached/run create mode 100644 data/cache/impl.py diff --git a/Dockerfile b/Dockerfile index f319fe048..30504e666 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,8 @@ FROM quay.io/quay/quay-base:latest +RUN adduser memcached --disabled-login --system + WORKDIR $QUAYDIR COPY requirements.txt requirements-tests.txt ./ diff --git a/app.py b/app.py index fdf908b9f..ca1ea2d92 100644 --- a/app.py +++ b/app.py @@ -22,7 +22,7 @@ from data import model from data.archivedlogs import LogArchive from data.billing import Billing from data.buildlogs import BuildLogs -from data.cache import InMemoryDataModelCache +from data.cache import get_model_cache from data.model.user import LoginWrappedDBUser from data.queue import WorkQueue, BuildMetricQueueReporter from data.userevent import UserEventsBuilderModule @@ -178,9 +178,7 @@ Principal(app, use_sessions=False) tf = app.config['DB_TRANSACTION_FACTORY'] -# TODO(jschorr): make this configurable -model_cache = InMemoryDataModelCache() - +model_cache = get_model_cache(app.config) avatar = Avatar(app) login_manager = LoginManager(app) mail = Mail(app) diff --git a/conf/init/service/interactive/memcached/log/run b/conf/init/service/interactive/memcached/log/run new file mode 100755 index 000000000..25afe47dd --- /dev/null +++ b/conf/init/service/interactive/memcached/log/run @@ -0,0 +1,7 @@ +#!/bin/sh + +# Ensure dependencies start before the logger +sv check syslog-ng > /dev/null || exit 1 + +# Start the logger +exec logger -i -t memcached diff --git a/conf/init/service/interactive/memcached/run b/conf/init/service/interactive/memcached/run new file mode 100755 index 000000000..720c8ad3e --- /dev/null +++ b/conf/init/service/interactive/memcached/run @@ -0,0 +1,12 @@ +#! /bin/bash + +echo 'Starting memcached' + +if [ "$DEBUGLOG" == "true" ] +then + memcached -u memcached -m 64 -vv -l 127.0.0.1 -p 18080 +else + memcached -u memcached -m 64 -l 127.0.0.1 -p 18080 +fi + +echo 'memcached exited' diff --git a/config.py b/config.py index e912b8ab1..4fcb7f336 100644 --- a/config.py +++ b/config.py @@ -513,3 +513,9 @@ class DefaultConfig(ImmutableConfig): # For Billing Support Only: The number of allowed builds on a namespace that has been billed # successfully. BILLED_NAMESPACE_MAXIMUM_BUILD_COUNT = None + + # Configuration for the data model cache. + DATA_MODEL_CACHE_CONFIG = { + 'engine': 'memcached', + 'endpoint': ('127.0.0.1', 18080), + } diff --git a/data/cache/__init__.py b/data/cache/__init__.py index 96dc0223d..a7c44dadd 100644 --- a/data/cache/__init__.py +++ b/data/cache/__init__.py @@ -1,62 +1,23 @@ -import logging +from data.cache.impl import NoopDataModelCache, InMemoryDataModelCache, MemcachedModelCache -from datetime import datetime +def get_model_cache(config): + """ Returns a data model cache matching the given configuration. """ + cache_config = config.get('DATA_MODEL_CACHE_CONFIG', {}) + engine = cache_config.get('engine', 'noop') -from abc import ABCMeta, abstractmethod -from six import add_metaclass + if engine == 'noop': + return NoopDataModelCache() -from util.expiresdict import ExpiresDict -from util.timedeltastring import convert_to_timedelta + if engine == 'inmemory': + return InMemoryDataModelCache() -logger = logging.getLogger(__name__) + if engine == 'memcached': + endpoint = cache_config.get('endpoint', None) + if endpoint is None: + raise Exception('Missing `endpoint` for memcached model cache configuration') -def is_not_none(value): - return value is not None + timeout = cache_config.get('timeout') + connect_timeout = cache_config.get('connect_timeout') + return MemcachedModelCache(endpoint, timeout=timeout, connect_timeout=connect_timeout) - -@add_metaclass(ABCMeta) -class DataModelCache(object): - """ Defines an interface for cache storing and returning tuple data model objects. """ - - @abstractmethod - def retrieve(self, cache_key, loader, should_cache=is_not_none): - """ Checks the cache for the specified cache key and returns the value found (if any). If none - found, the loader is called to get a result and populate the cache. - """ - pass - - -class NoopDataModelCache(DataModelCache): - """ Implementation of the data model cache which does nothing. """ - - def retrieve(self, cache_key, loader, should_cache=is_not_none): - return loader() - - -class InMemoryDataModelCache(DataModelCache): - """ Implementation of the data model cache backed by an in-memory dictionary. """ - def __init__(self): - self.cache = ExpiresDict(rebuilder=lambda: {}) - - def retrieve(self, cache_key, loader, should_cache=is_not_none): - not_found = [None] - logger.debug('Checking cache for key %s', cache_key.key) - result = self.cache.get(cache_key.key, default_value=not_found) - if result != not_found: - logger.debug('Found result in cache for key %s: %s', cache_key.key, result) - return result - - logger.debug('Found no result in cache for key %s; calling loader', cache_key.key) - result = loader() - logger.debug('Got loaded result for key %s: %s', cache_key.key, result) - if should_cache(result): - logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key, - result, cache_key.expiration) - expires = convert_to_timedelta(cache_key.expiration) + datetime.now() - self.cache.set(cache_key.key, result, expires=expires) - logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key, - result, cache_key.expiration) - else: - logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result) - - return result + raise Exception('Unknown model cache engine `%s`' % engine) diff --git a/data/cache/cache_key.py b/data/cache/cache_key.py index b0d4d7011..3b10558e5 100644 --- a/data/cache/cache_key.py +++ b/data/cache/cache_key.py @@ -5,4 +5,4 @@ class CacheKey(namedtuple('CacheKey', ['key', 'expiration'])): pass def for_repository_blob(namespace_name, repo_name, digest): - return CacheKey('repository_blob:%s:%s:%s' % (namespace_name, repo_name, digest), '60s') + return CacheKey('repository_blob__%s_%s_%s' % (namespace_name, repo_name, digest), '60s') diff --git a/data/cache/impl.py b/data/cache/impl.py new file mode 100644 index 000000000..4f4f12668 --- /dev/null +++ b/data/cache/impl.py @@ -0,0 +1,146 @@ +import logging +import json + +from datetime import datetime + +from abc import ABCMeta, abstractmethod +from six import add_metaclass + +from pymemcache.client.base import Client + +from util.expiresdict import ExpiresDict +from util.timedeltastring import convert_to_timedelta + +logger = logging.getLogger(__name__) + + +def is_not_none(value): + return value is not None + + +@add_metaclass(ABCMeta) +class DataModelCache(object): + """ Defines an interface for cache storing and returning tuple data model objects. """ + + @abstractmethod + def retrieve(self, cache_key, loader, should_cache=is_not_none): + """ Checks the cache for the specified cache key and returns the value found (if any). If none + found, the loader is called to get a result and populate the cache. + """ + pass + + +class NoopDataModelCache(DataModelCache): + """ Implementation of the data model cache which does nothing. """ + + def retrieve(self, cache_key, loader, should_cache=is_not_none): + return loader() + + +class InMemoryDataModelCache(DataModelCache): + """ Implementation of the data model cache backed by an in-memory dictionary. """ + def __init__(self): + self.cache = ExpiresDict(rebuilder=lambda: {}) + + def retrieve(self, cache_key, loader, should_cache=is_not_none): + not_found = [None] + logger.debug('Checking cache for key %s', cache_key.key) + result = self.cache.get(cache_key.key, default_value=not_found) + if result != not_found: + logger.debug('Found result in cache for key %s: %s', cache_key.key, result) + return result + + logger.debug('Found no result in cache for key %s; calling loader', cache_key.key) + result = loader() + logger.debug('Got loaded result for key %s: %s', cache_key.key, result) + if should_cache(result): + logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key, + result, cache_key.expiration) + expires = convert_to_timedelta(cache_key.expiration) + datetime.now() + self.cache.set(cache_key.key, result, expires=expires) + logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key, + result, cache_key.expiration) + else: + logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result) + + return result + + +_DEFAULT_MEMCACHE_TIMEOUT = 1 # second +_DEFAULT_MEMCACHE_CONNECT_TIMEOUT = 1 # second + +_STRING_TYPE = 1 +_JSON_TYPE = 2 + +class MemcachedModelCache(DataModelCache): + """ Implementation of the data model cache backed by a memcached. """ + def __init__(self, endpoint, timeout=_DEFAULT_MEMCACHE_TIMEOUT, + connect_timeout=_DEFAULT_MEMCACHE_CONNECT_TIMEOUT): + self.endpoint = endpoint + self.timeout = timeout + self.connect_timeout = connect_timeout + self.client = None + + def _get_client(self): + client = self.client + if client is not None: + return client + + try: + # Copied from the doc comment for Client. + def serialize_json(key, value): + if type(value) == str: + return value, _STRING_TYPE + + return json.dumps(value), _JSON_TYPE + + def deserialize_json(key, value, flags): + if flags == _STRING_TYPE: + return value + + if flags == _JSON_TYPE: + return json.loads(value) + + raise Exception("Unknown flags for value: {1}".format(flags)) + + self.client = Client(self.endpoint, no_delay=True, timeout=self.timeout, + connect_timeout=self.connect_timeout, + key_prefix='data_model_cache__', + serializer=serialize_json, + deserializer=deserialize_json, + ignore_exc=True) + return self.client + except: + logger.exception('Got exception when creating memcached client to %s', self.endpoint) + return None + + def retrieve(self, cache_key, loader, should_cache=is_not_none): + not_found = [None] + client = self._get_client() + if client is not None: + logger.debug('Checking cache for key %s', cache_key.key) + try: + result = client.get(cache_key.key, default=not_found) + if result != not_found: + logger.debug('Found result in cache for key %s: %s', cache_key.key, result) + return result + except: + logger.exception('Got exception when trying to retrieve key %s', cache_key.key) + + logger.debug('Found no result in cache for key %s; calling loader', cache_key.key) + result = loader() + logger.debug('Got loaded result for key %s: %s', cache_key.key, result) + if client is not None and should_cache(result): + try: + logger.debug('Caching loaded result for key %s with expiration %s: %s', cache_key.key, + result, cache_key.expiration) + expires = convert_to_timedelta(cache_key.expiration) if cache_key.expiration else None + client.set(cache_key.key, result, expire=int(expires.total_seconds()) if expires else None) + logger.debug('Cached loaded result for key %s with expiration %s: %s', cache_key.key, + result, cache_key.expiration) + except: + logger.exception('Got exception when trying to set key %s to %s', cache_key.key, result) + else: + logger.debug('Not caching loaded result for key %s: %s', cache_key.key, result) + + return result diff --git a/data/cache/test/test_cache.py b/data/cache/test/test_cache.py index d9e3708d0..bf0c4cccd 100644 --- a/data/cache/test/test_cache.py +++ b/data/cache/test/test_cache.py @@ -1,8 +1,21 @@ import pytest -from data.cache import InMemoryDataModelCache, NoopDataModelCache +from mock import patch + +from data.cache import InMemoryDataModelCache, NoopDataModelCache, MemcachedModelCache from data.cache.cache_key import CacheKey +class MockClient(object): + def __init__(self, server, **kwargs): + self.data = {} + + def get(self, key, default=None): + return self.data.get(key, default) + + def set(self, key, value, expire=None): + self.data[key] = value + + @pytest.mark.parametrize('cache_type', [ (NoopDataModelCache), (InMemoryDataModelCache), @@ -12,5 +25,32 @@ def test_caching(cache_type): cache = cache_type() # Perform two retrievals, and make sure both return. - assert cache.retrieve(key, lambda: 1234) == 1234 - assert cache.retrieve(key, lambda: 1234) == 1234 + assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234} + assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234} + + +def test_memcache(): + key = CacheKey('foo', '60m') + with patch('data.cache.impl.Client', MockClient): + cache = MemcachedModelCache(('127.0.0.1', '-1')) + assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234} + assert cache.retrieve(key, lambda: {'a': 1234}) == {'a': 1234} + + +def test_memcache_should_cache(): + key = CacheKey('foo', None) + + def sc(value): + return value['a'] != 1234 + + with patch('data.cache.impl.Client', MockClient): + cache = MemcachedModelCache(('127.0.0.1', '-1')) + assert cache.retrieve(key, lambda: {'a': 1234}, should_cache=sc) == {'a': 1234} + + # Ensure not cached since it was `1234`. + assert cache._get_client().get(key.key) is None + + # Ensure cached. + assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345} + assert cache._get_client().get(key.key) is not None + assert cache.retrieve(key, lambda: {'a': 2345}, should_cache=sc) == {'a': 2345} diff --git a/quay-base.dockerfile b/quay-base.dockerfile index 9967f7701..0df7fed6b 100644 --- a/quay-base.dockerfile +++ b/quay-base.dockerfile @@ -44,6 +44,7 @@ RUN apt-get update && apt-get upgrade -y \ libpq5 \ libsasl2-dev \ libsasl2-modules \ + memcached \ monit \ nginx \ nodejs \ @@ -54,7 +55,7 @@ RUN apt-get update && apt-get upgrade -y \ python-pip \ python-virtualenv \ yarn=0.22.0-1 \ - w3m # 13DEC2017 + w3m # 19DEC2017 # Install cfssl RUN mkdir /gocode diff --git a/requirements-nover.txt b/requirements-nover.txt index 5226a7d01..78260d5a7 100644 --- a/requirements-nover.txt +++ b/requirements-nover.txt @@ -78,3 +78,4 @@ xhtml2pdf recaptcha2 mockredispy yapf +pymemcache \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 36043a933..0372df258 100644 --- a/requirements.txt +++ b/requirements.txt @@ -108,6 +108,7 @@ pyjwkest==1.4.0 PyJWT==1.5.3 PyMySQL==0.6.7 pyOpenSSL==17.5.0 +pymemcache==1.4.3 pyparsing==2.2.0 PyPDF2==1.26.0 python-dateutil==2.6.1 diff --git a/test/testconfig.py b/test/testconfig.py index c3d564239..2ba731589 100644 --- a/test/testconfig.py +++ b/test/testconfig.py @@ -102,3 +102,7 @@ class TestConfig(DefaultConfig): TAG_EXPIRATION_OPTIONS = ['0s', '1s', '1d', '1w', '2w', '4w'] DEFAULT_NAMESPACE_MAXIMUM_BUILD_COUNT = None + + DATA_MODEL_CACHE_CONFIG = { + 'engine': 'inmemory', + } diff --git a/util/config/schema.py b/util/config/schema.py index 5bf7320ab..f9860946d 100644 --- a/util/config/schema.py +++ b/util/config/schema.py @@ -80,6 +80,8 @@ INTERNAL_ONLY_PROPERTIES = { 'SECURITY_SCANNER_READONLY_FAILOVER_ENDPOINTS', 'SECURITY_SCANNER_API_VERSION', + 'DATA_MODEL_CACHE_CONFIG', + # TODO: move this into the schema once we support signing in QE. 'FEATURE_SIGNING', 'TUF_SERVER',