Add instance health checks for all gunicorn workers
Fixes https://jira.coreos.com/browse/QS-121
This commit is contained in:
parent
18f1ccf80b
commit
e91b83e1be
3 changed files with 36 additions and 21 deletions
|
@ -10,7 +10,7 @@ from auth.auth_context import get_authenticated_user
|
||||||
from auth.decorators import process_auth
|
from auth.decorators import process_auth
|
||||||
from auth.permissions import ReadRepositoryPermission
|
from auth.permissions import ReadRepositoryPermission
|
||||||
from data import database
|
from data import database
|
||||||
from endpoints.decorators import anon_protect, route_show_if, parse_repository_name
|
from endpoints.decorators import anon_protect, anon_allowed, route_show_if, parse_repository_name
|
||||||
from endpoints.verbs.models_pre_oci import pre_oci_model as model
|
from endpoints.verbs.models_pre_oci import pre_oci_model as model
|
||||||
from endpoints.v2.blob import BLOB_DIGEST_ROUTE
|
from endpoints.v2.blob import BLOB_DIGEST_ROUTE
|
||||||
from image.appc import AppCImageFormatter
|
from image.appc import AppCImageFormatter
|
||||||
|
@ -394,3 +394,9 @@ def get_tag_torrent(namespace_name, repo_name, digest):
|
||||||
|
|
||||||
metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'torrent', True])
|
metric_queue.repository_pull.Inc(labelvalues=[namespace_name, repo_name, 'torrent', True])
|
||||||
return _torrent_for_blob(blob, repo_is_public)
|
return _torrent_for_blob(blob, repo_is_public)
|
||||||
|
|
||||||
|
|
||||||
|
@verbs.route('/_internal_ping')
|
||||||
|
@anon_allowed
|
||||||
|
def internal_ping():
|
||||||
|
return make_response('true', 200)
|
||||||
|
|
|
@ -67,6 +67,10 @@ STATUS_TAGS = app.config['STATUS_TAGS']
|
||||||
def index(path, **kwargs):
|
def index(path, **kwargs):
|
||||||
return render_page_template_with_routedata('index.html', **kwargs)
|
return render_page_template_with_routedata('index.html', **kwargs)
|
||||||
|
|
||||||
|
@web.route('/_internal_ping')
|
||||||
|
@anon_allowed
|
||||||
|
def internal_ping():
|
||||||
|
return make_response('true', 200)
|
||||||
|
|
||||||
@web.route('/500', methods=['GET'])
|
@web.route('/500', methods=['GET'])
|
||||||
def internal_error_display():
|
def internal_error_display():
|
||||||
|
|
|
@ -5,27 +5,30 @@ from health.models_pre_oci import pre_oci_model as model
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def _check_registry_gunicorn(app):
|
def _check_gunicorn(endpoint):
|
||||||
""" Returns the status of the registry gunicorn workers. """
|
def fn(app):
|
||||||
# Compute the URL for checking the registry endpoint. We append a port if and only if the
|
""" Returns the status of the gunicorn workers. """
|
||||||
# hostname contains one.
|
# Compute the URL for checking the endpoint. We append a port if and only if the
|
||||||
client = app.config['HTTPCLIENT']
|
# hostname contains one.
|
||||||
hostname_parts = app.config['SERVER_HOSTNAME'].split(':')
|
client = app.config['HTTPCLIENT']
|
||||||
port = ''
|
hostname_parts = app.config['SERVER_HOSTNAME'].split(':')
|
||||||
if len(hostname_parts) == 2:
|
port = ''
|
||||||
port = ':' + hostname_parts[1]
|
if len(hostname_parts) == 2:
|
||||||
|
port = ':' + hostname_parts[1]
|
||||||
|
|
||||||
scheme = app.config['PREFERRED_URL_SCHEME']
|
scheme = app.config['PREFERRED_URL_SCHEME']
|
||||||
if app.config.get('EXTERNAL_TLS_TERMINATION', False):
|
if app.config.get('EXTERNAL_TLS_TERMINATION', False):
|
||||||
scheme = 'http'
|
scheme = 'http'
|
||||||
|
|
||||||
registry_url = '%s://localhost%s/v1/_internal_ping' % (scheme, port)
|
registry_url = '%s://localhost%s/%s' % (scheme, port, endpoint)
|
||||||
try:
|
try:
|
||||||
status_code = client.get(registry_url, verify=False, timeout=2).status_code
|
status_code = client.get(registry_url, verify=False, timeout=2).status_code
|
||||||
return (status_code == 200, 'Got non-200 response for registry: %s' % status_code)
|
return (status_code == 200, 'Got non-200 response for worker: %s' % status_code)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
logger.exception('Exception when checking registry health: %s', registry_url)
|
logger.exception('Exception when checking worker health: %s', registry_url)
|
||||||
return (False, 'Exception when checking registry health: %s' % registry_url)
|
return (False, 'Exception when checking worker health: %s' % registry_url)
|
||||||
|
|
||||||
|
return fn
|
||||||
|
|
||||||
|
|
||||||
def _check_database(app):
|
def _check_database(app):
|
||||||
|
@ -53,7 +56,9 @@ def _check_auth(app):
|
||||||
|
|
||||||
|
|
||||||
_SERVICES = {
|
_SERVICES = {
|
||||||
'registry_gunicorn': _check_registry_gunicorn,
|
'registry_gunicorn': _check_gunicorn('v1/_internal_ping'),
|
||||||
|
'web_gunicorn': _check_gunicorn('_internal_ping'),
|
||||||
|
'verbs_gunicorn': _check_gunicorn('c1/_internal_ping'),
|
||||||
'database': _check_database,
|
'database': _check_database,
|
||||||
'redis': _check_redis,
|
'redis': _check_redis,
|
||||||
'storage': _check_storage,
|
'storage': _check_storage,
|
||||||
|
|
Reference in a new issue