diff --git a/health/services.py b/health/services.py index c5e6a3a51..5998292af 100644 --- a/health/services.py +++ b/health/services.py @@ -1,5 +1,5 @@ import logging -from app import build_logs, storage, authentication +from app import build_logs, storage, authentication, instance_keys from health.models_pre_oci import pre_oci_model as model logger = logging.getLogger(__name__) @@ -50,11 +50,36 @@ def _check_storage(app): logger.exception('Storage check failed with exception %s', ex) return (False, 'Storage check failed with exception %s' % ex.message) + def _check_auth(app): """ Returns the status of the auth engine, as accessed from this instance. """ return authentication.ping() +def _check_service_key(app): + """ Returns the status of the service key for this instance. If the key has disappeared or + has expired, then will return False. + """ + if not app.config.get('SETUP_COMPLETE', False): + return (True, 'Stack not fully setup') + + try: + kid = instance_keys.local_key_id + except IOError as ex: + # Key has not been created yet. + return (True, 'Stack not fully setup') + + try: + result = bool(instance_keys.get_service_key_public_key(kid)) + return (result, 'Could not find valid instance service key %s' % kid) + except Exception as ex: + logger.exception('Got exception when trying to retrieve the instance key') + + # NOTE: We return *True* here if there was an exception when retrieving the key, as it means + # the database is down, which will be handled by the database health check. + return (True, 'Failed to get instance key due to a database issue') + + _SERVICES = { 'registry_gunicorn': _check_gunicorn('v1/_internal_ping'), 'web_gunicorn': _check_gunicorn('_internal_ping'), @@ -63,6 +88,7 @@ _SERVICES = { 'redis': _check_redis, 'storage': _check_storage, 'auth': _check_auth, + 'service_key': _check_service_key, } def check_all_services(app, skip):