Make health check failures report their reasons

Note that we add a new block with expanded service info, to avoid breaking compatibility with existing callers of the health endpoint
This commit is contained in:
Joseph Schorr 2017-05-10 21:05:14 -07:00
parent e44a503bd0
commit 4ad3682b9c
4 changed files with 32 additions and 20 deletions

View file

@ -46,15 +46,29 @@ class HealthCheck(object):
is_healthy = True
notes = notes or []
service_statuses_bools = {}
service_status_expanded = {}
for service_name in service_statuses:
status, err = service_statuses[service_name]
service_statuses_bools[service_name] = status
service_status_expanded[service_name] = {
'status': status,
}
if not status:
service_status_expanded[service_name]['failure'] = err
if skip and service_name in skip:
notes.append('%s skipped in compute health' % service_name)
continue
is_healthy = is_healthy and service_statuses[service_name]
is_healthy = is_healthy and status
data = {
'services': service_statuses,
'services': service_statuses_bools,
'services_expanded': service_status_expanded,
'notes': notes,
'is_testing': self.app.config['TESTING'],
'config_provider': self.config_provider.provider_id,

View file

@ -21,10 +21,11 @@ def _check_registry_gunicorn(app):
registry_url = '%s://localhost%s/v1/_internal_ping' % (scheme, port)
try:
return client.get(registry_url, verify=False, timeout=2).status_code == 200
except Exception:
status_code = client.get(registry_url, verify=False, timeout=2).status_code
return (status_code == 200, 'Got non-200 response for registry: %s' % status_code)
except Exception as ex:
logger.exception('Exception when checking registry health: %s', registry_url)
return False
return (False, 'Exception when checking registry health: %s' % registry_url)
def _check_database(app):
@ -41,15 +42,14 @@ def _check_storage(app):
""" Returns the status of storage, as accessed from this instance. """
try:
storage.validate(storage.preferred_locations, app.config['HTTPCLIENT'])
return True
return (True, None)
except Exception as ex:
logger.exception('Storage check failed with exception %s', ex)
return False
return (False, 'Storage check failed with exception %s' % ex.message)
def _check_auth(app):
""" Returns the status of the auth engine, as accessed from this instance. """
(status, _) = authentication.ping()
return status
return authentication.ping()
_SERVICES = {