Make the DB health check first attempt a simple DB connection. If the database is in the middle of a failover, this will fail after 3 seconds (the connection timeout specified), rather than hanging and causing the ELB health checks to timeout and fail.
This commit is contained in:
parent
2a89accc49
commit
92d32bc636
3 changed files with 27 additions and 6 deletions
|
@ -70,6 +70,14 @@ read_slave = Proxy()
|
||||||
db_random_func = CallableProxy()
|
db_random_func = CallableProxy()
|
||||||
|
|
||||||
|
|
||||||
|
def validate_database_url(url, connect_timeout=5):
|
||||||
|
driver = _db_from_url(url, {
|
||||||
|
'connect_timeout': connect_timeout
|
||||||
|
})
|
||||||
|
driver.connect()
|
||||||
|
driver.close()
|
||||||
|
|
||||||
|
|
||||||
def _db_from_url(url, db_kwargs):
|
def _db_from_url(url, db_kwargs):
|
||||||
parsed_url = make_url(url)
|
parsed_url = make_url(url)
|
||||||
|
|
||||||
|
@ -82,6 +90,10 @@ def _db_from_url(url, db_kwargs):
|
||||||
if parsed_url.password:
|
if parsed_url.password:
|
||||||
db_kwargs['password'] = parsed_url.password
|
db_kwargs['password'] = parsed_url.password
|
||||||
|
|
||||||
|
# Note: sqlite does not support connect_timeout.
|
||||||
|
if parsed_url.drivername == 'sqlite' and 'connect_timeout' in db_kwargs:
|
||||||
|
del db_kwargs['connect_timeout']
|
||||||
|
|
||||||
return SCHEME_DRIVERS[parsed_url.drivername](parsed_url.database, **db_kwargs)
|
return SCHEME_DRIVERS[parsed_url.drivername](parsed_url.database, **db_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
|
||||||
ExternalNotificationEvent, ExternalNotificationMethod,
|
ExternalNotificationEvent, ExternalNotificationMethod,
|
||||||
RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite,
|
RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite,
|
||||||
DerivedImageStorage, ImageStorageTransformation, random_string_generator,
|
DerivedImageStorage, ImageStorageTransformation, random_string_generator,
|
||||||
db, BUILD_PHASE, QuayUserField)
|
db, BUILD_PHASE, QuayUserField, validate_database_url)
|
||||||
from peewee import JOIN_LEFT_OUTER, fn
|
from peewee import JOIN_LEFT_OUTER, fn
|
||||||
from util.validation import (validate_username, validate_email, validate_password,
|
from util.validation import (validate_username, validate_email, validate_password,
|
||||||
INVALID_PASSWORD_MESSAGE)
|
INVALID_PASSWORD_MESSAGE)
|
||||||
|
@ -2257,11 +2257,20 @@ def delete_user(user):
|
||||||
# TODO: also delete any repository data associated
|
# TODO: also delete any repository data associated
|
||||||
|
|
||||||
|
|
||||||
def check_health():
|
def check_health(app_config):
|
||||||
|
# Attempt to connect to the database first. If the DB is not responding,
|
||||||
|
# using the validate_database_url will timeout quickly, as opposed to
|
||||||
|
# making a normal connect which will just hang (thus breaking the health
|
||||||
|
# check).
|
||||||
|
try:
|
||||||
|
validate_database_url(app_config['DB_URI'], connect_timeout=3)
|
||||||
|
except Exception:
|
||||||
|
logger.exception('Could not connect to the database')
|
||||||
|
return False
|
||||||
|
|
||||||
# We will connect to the db, check that it contains some log entry kinds
|
# We will connect to the db, check that it contains some log entry kinds
|
||||||
try:
|
try:
|
||||||
found_count = LogEntryKind.select().count()
|
return bool(list(LogEntryKind.select().limit(1)))
|
||||||
return found_count > 0
|
|
||||||
except:
|
except:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
|
@ -161,7 +161,7 @@ def v1():
|
||||||
def health():
|
def health():
|
||||||
client = app.config['HTTPCLIENT']
|
client = app.config['HTTPCLIENT']
|
||||||
|
|
||||||
db_healthy = model.check_health()
|
db_healthy = model.check_health(app.config)
|
||||||
buildlogs_healthy = build_logs.check_health()
|
buildlogs_healthy = build_logs.check_health()
|
||||||
|
|
||||||
hostname_parts = app.config['SERVER_HOSTNAME'].split(':')
|
hostname_parts = app.config['SERVER_HOSTNAME'].split(':')
|
||||||
|
@ -187,7 +187,7 @@ def health():
|
||||||
@web.route('/status', methods=['GET'])
|
@web.route('/status', methods=['GET'])
|
||||||
@no_cache
|
@no_cache
|
||||||
def status():
|
def status():
|
||||||
db_healthy = model.check_health()
|
db_healthy = model.check_health(app.config)
|
||||||
buildlogs_healthy = build_logs.check_health()
|
buildlogs_healthy = build_logs.check_health()
|
||||||
|
|
||||||
response = jsonify({
|
response = jsonify({
|
||||||
|
|
Reference in a new issue