Add a health check for the instance key

If the key expires or disappears, the node will now go unhealthy, taking it out of service and preventing downtime
This commit is contained in:
Joseph Schorr 2017-11-10 13:47:19 -05:00 committed by Joseph Schorr
parent 6514bf229f
commit c1cc52f58b

View file

@ -1,5 +1,5 @@
import logging
from app import build_logs, storage, authentication
from app import build_logs, storage, authentication, instance_keys
from health.models_pre_oci import pre_oci_model as model
logger = logging.getLogger(__name__)
@ -50,11 +50,36 @@ def _check_storage(app):
logger.exception('Storage check failed with exception %s', ex)
return (False, 'Storage check failed with exception %s' % ex.message)
def _check_auth(app):
""" Returns the status of the auth engine, as accessed from this instance. """
return authentication.ping()
def _check_service_key(app):
""" Returns the status of the service key for this instance. If the key has disappeared or
has expired, then will return False.
"""
if not app.config.get('SETUP_COMPLETE', False):
return (True, 'Stack not fully setup')
try:
kid = instance_keys.local_key_id
except IOError as ex:
# Key has not been created yet.
return (True, 'Stack not fully setup')
try:
result = bool(instance_keys.get_service_key_public_key(kid))
return (result, 'Could not find valid instance service key %s' % kid)
except Exception as ex:
logger.exception('Got exception when trying to retrieve the instance key')
# NOTE: We return *True* here if there was an exception when retrieving the key, as it means
# the database is down, which will be handled by the database health check.
return (True, 'Failed to get instance key due to a database issue')
_SERVICES = {
'registry_gunicorn': _check_gunicorn('v1/_internal_ping'),
'web_gunicorn': _check_gunicorn('_internal_ping'),
@ -63,6 +88,7 @@ _SERVICES = {
'redis': _check_redis,
'storage': _check_storage,
'auth': _check_auth,
'service_key': _check_service_key,
}
def check_all_services(app, skip):