Add a health check for the instance key
If the key expires or disappears, the node will now go unhealthy, taking it out of service and preventing downtime
This commit is contained in:
parent
6514bf229f
commit
c1cc52f58b
1 changed files with 27 additions and 1 deletions
|
@ -1,5 +1,5 @@
|
||||||
import logging
|
import logging
|
||||||
from app import build_logs, storage, authentication
|
from app import build_logs, storage, authentication, instance_keys
|
||||||
from health.models_pre_oci import pre_oci_model as model
|
from health.models_pre_oci import pre_oci_model as model
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -50,11 +50,36 @@ def _check_storage(app):
|
||||||
logger.exception('Storage check failed with exception %s', ex)
|
logger.exception('Storage check failed with exception %s', ex)
|
||||||
return (False, 'Storage check failed with exception %s' % ex.message)
|
return (False, 'Storage check failed with exception %s' % ex.message)
|
||||||
|
|
||||||
|
|
||||||
def _check_auth(app):
|
def _check_auth(app):
|
||||||
""" Returns the status of the auth engine, as accessed from this instance. """
|
""" Returns the status of the auth engine, as accessed from this instance. """
|
||||||
return authentication.ping()
|
return authentication.ping()
|
||||||
|
|
||||||
|
|
||||||
|
def _check_service_key(app):
|
||||||
|
""" Returns the status of the service key for this instance. If the key has disappeared or
|
||||||
|
has expired, then will return False.
|
||||||
|
"""
|
||||||
|
if not app.config.get('SETUP_COMPLETE', False):
|
||||||
|
return (True, 'Stack not fully setup')
|
||||||
|
|
||||||
|
try:
|
||||||
|
kid = instance_keys.local_key_id
|
||||||
|
except IOError as ex:
|
||||||
|
# Key has not been created yet.
|
||||||
|
return (True, 'Stack not fully setup')
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = bool(instance_keys.get_service_key_public_key(kid))
|
||||||
|
return (result, 'Could not find valid instance service key %s' % kid)
|
||||||
|
except Exception as ex:
|
||||||
|
logger.exception('Got exception when trying to retrieve the instance key')
|
||||||
|
|
||||||
|
# NOTE: We return *True* here if there was an exception when retrieving the key, as it means
|
||||||
|
# the database is down, which will be handled by the database health check.
|
||||||
|
return (True, 'Failed to get instance key due to a database issue')
|
||||||
|
|
||||||
|
|
||||||
_SERVICES = {
|
_SERVICES = {
|
||||||
'registry_gunicorn': _check_gunicorn('v1/_internal_ping'),
|
'registry_gunicorn': _check_gunicorn('v1/_internal_ping'),
|
||||||
'web_gunicorn': _check_gunicorn('_internal_ping'),
|
'web_gunicorn': _check_gunicorn('_internal_ping'),
|
||||||
|
@ -63,6 +88,7 @@ _SERVICES = {
|
||||||
'redis': _check_redis,
|
'redis': _check_redis,
|
||||||
'storage': _check_storage,
|
'storage': _check_storage,
|
||||||
'auth': _check_auth,
|
'auth': _check_auth,
|
||||||
|
'service_key': _check_service_key,
|
||||||
}
|
}
|
||||||
|
|
||||||
def check_all_services(app, skip):
|
def check_all_services(app, skip):
|
||||||
|
|
Reference in a new issue