From a4de476a8585112c13e9de13c49b510fdcd1b222 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 14 Jan 2015 23:39:58 -0500 Subject: [PATCH 1/3] Have the health check also ping the registry endpoint to make sure it is functional. --- endpoints/index.py | 5 +++++ endpoints/web.py | 12 +++++++++++- health/healthcheck.py | 14 ++++++++------ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/endpoints/index.py b/endpoints/index.py index 00221b8cc..de45f2fde 100644 --- a/endpoints/index.py +++ b/endpoints/index.py @@ -380,6 +380,11 @@ def get_search(): resp.mimetype = 'application/json' return resp +# Note: This is *not* part of the Docker index spec. This is here for our own health check, +# since we have nginx handle the _ping below. +@index.route('/_internal_ping') +def internal_ping(): + return make_response('true', 200) @index.route('/_ping') @index.route('/_ping') diff --git a/endpoints/web.py b/endpoints/web.py index 519fc5c5e..c1ce6abbc 100644 --- a/endpoints/web.py +++ b/endpoints/web.py @@ -156,11 +156,21 @@ def v1(): @web.route('/health', methods=['GET']) @no_cache def health(): + client = app.config['HTTPCLIENT'] + db_healthy = model.check_health() buildlogs_healthy = build_logs.check_health() + hostname_parts = app.config['SERVER_HOSTNAME'].split(':') + port = '' + if len(hostname_parts) == 2: + port = ':' + hostname_parts[1] + + registry_url = '%s://localhost%s/v1/_internal_ping' % (app.config['PREFERRED_URL_SCHEME'], port) + registry_healthy = client.get(registry_url, verify=False).status_code == 200 + check = HealthCheck.get_check(app.config['HEALTH_CHECKER'][0], app.config['HEALTH_CHECKER'][1]) - (data, is_healthy) = check.conduct_healthcheck(db_healthy, buildlogs_healthy) + (data, is_healthy) = check.conduct_healthcheck(db_healthy, buildlogs_healthy, registry_healthy) response = jsonify(dict(data=data, is_healthy=is_healthy)) response.status_code = 200 if is_healthy else 503 diff --git a/health/healthcheck.py b/health/healthcheck.py index dc0ae7e6f..f5204dff7 100644 --- a/health/healthcheck.py +++ b/health/healthcheck.py @@ -7,7 +7,7 @@ class HealthCheck(object): def __init__(self): pass - def conduct_healthcheck(self, db_healthy, buildlogs_healthy): + def conduct_healthcheck(self, db_healthy, buildlogs_healthy, registry_healthy): """ Conducts any custom healthcheck work, returning a dict representing the HealthCheck output and a boolean indicating whether the instance is healthy. @@ -31,10 +31,11 @@ class LocalHealthCheck(HealthCheck): def check_name(cls): return 'LocalHealthCheck' - def conduct_healthcheck(self, db_healthy, buildlogs_healthy): + def conduct_healthcheck(self, db_healthy, buildlogs_healthy, registry_healthy): data = { 'db_healthy': db_healthy, - 'buildlogs_healthy': buildlogs_healthy + 'buildlogs_healthy': buildlogs_healthy, + 'registry_healthy': registry_healthy } return (data, db_healthy and buildlogs_healthy) @@ -49,10 +50,11 @@ class ProductionHealthCheck(HealthCheck): def check_name(cls): return 'ProductionHealthCheck' - def conduct_healthcheck(self, db_healthy, buildlogs_healthy): + def conduct_healthcheck(self, db_healthy, buildlogs_healthy, registry_healthy): data = { 'db_healthy': db_healthy, - 'buildlogs_healthy': buildlogs_healthy + 'buildlogs_healthy': buildlogs_healthy, + 'registry_healthy': registry_healthy } # Only report unhealthy if the machine cannot connect to the DB. Redis isn't required for @@ -81,4 +83,4 @@ class ProductionHealthCheck(HealthCheck): # requests once RDS comes back up. return (data, not is_rds_working) - return (data, db_healthy) + return (data, db_healthy and registry_healthy) From 93708d0131fb1ac467a571b132a4b5c7e9128d96 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Wed, 14 Jan 2015 23:41:30 -0500 Subject: [PATCH 2/3] Add the registry value to the other returned health value --- health/healthcheck.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/health/healthcheck.py b/health/healthcheck.py index f5204dff7..cc76c76c7 100644 --- a/health/healthcheck.py +++ b/health/healthcheck.py @@ -81,6 +81,6 @@ class ProductionHealthCheck(HealthCheck): # If RDS is down, then we still report the machine as healthy, so that it can handle # requests once RDS comes back up. - return (data, not is_rds_working) + return (data, not is_rds_working and registry_healthy) return (data, db_healthy and registry_healthy) From 2bae008bb178011c2507d112fa9ea798660e7f2f Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Fri, 16 Jan 2015 13:22:54 -0500 Subject: [PATCH 3/3] Add a timeout to the health check on the registry workers --- endpoints/web.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/endpoints/web.py b/endpoints/web.py index c1ce6abbc..fb08ea60e 100644 --- a/endpoints/web.py +++ b/endpoints/web.py @@ -167,7 +167,7 @@ def health(): port = ':' + hostname_parts[1] registry_url = '%s://localhost%s/v1/_internal_ping' % (app.config['PREFERRED_URL_SCHEME'], port) - registry_healthy = client.get(registry_url, verify=False).status_code == 200 + registry_healthy = client.get(registry_url, verify=False, timeout=2).status_code == 200 check = HealthCheck.get_check(app.config['HEALTH_CHECKER'][0], app.config['HEALTH_CHECKER'][1]) (data, is_healthy) = check.conduct_healthcheck(db_healthy, buildlogs_healthy, registry_healthy)