Make the DB health check first attempt a simple DB connection. If the database is in the middle of a failover, this will fail after 3 seconds (the connection timeout specified), rather than hanging and causing the ELB health checks to timeout and fail.
This commit is contained in:
parent
2a89accc49
commit
92d32bc636
3 changed files with 27 additions and 6 deletions
|
@ -70,6 +70,14 @@ read_slave = Proxy()
|
|||
db_random_func = CallableProxy()
|
||||
|
||||
|
||||
def validate_database_url(url, connect_timeout=5):
|
||||
driver = _db_from_url(url, {
|
||||
'connect_timeout': connect_timeout
|
||||
})
|
||||
driver.connect()
|
||||
driver.close()
|
||||
|
||||
|
||||
def _db_from_url(url, db_kwargs):
|
||||
parsed_url = make_url(url)
|
||||
|
||||
|
@ -82,6 +90,10 @@ def _db_from_url(url, db_kwargs):
|
|||
if parsed_url.password:
|
||||
db_kwargs['password'] = parsed_url.password
|
||||
|
||||
# Note: sqlite does not support connect_timeout.
|
||||
if parsed_url.drivername == 'sqlite' and 'connect_timeout' in db_kwargs:
|
||||
del db_kwargs['connect_timeout']
|
||||
|
||||
return SCHEME_DRIVERS[parsed_url.drivername](parsed_url.database, **db_kwargs)
|
||||
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor
|
|||
ExternalNotificationEvent, ExternalNotificationMethod,
|
||||
RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite,
|
||||
DerivedImageStorage, ImageStorageTransformation, random_string_generator,
|
||||
db, BUILD_PHASE, QuayUserField)
|
||||
db, BUILD_PHASE, QuayUserField, validate_database_url)
|
||||
from peewee import JOIN_LEFT_OUTER, fn
|
||||
from util.validation import (validate_username, validate_email, validate_password,
|
||||
INVALID_PASSWORD_MESSAGE)
|
||||
|
@ -2257,11 +2257,20 @@ def delete_user(user):
|
|||
# TODO: also delete any repository data associated
|
||||
|
||||
|
||||
def check_health():
|
||||
def check_health(app_config):
|
||||
# Attempt to connect to the database first. If the DB is not responding,
|
||||
# using the validate_database_url will timeout quickly, as opposed to
|
||||
# making a normal connect which will just hang (thus breaking the health
|
||||
# check).
|
||||
try:
|
||||
validate_database_url(app_config['DB_URI'], connect_timeout=3)
|
||||
except Exception:
|
||||
logger.exception('Could not connect to the database')
|
||||
return False
|
||||
|
||||
# We will connect to the db, check that it contains some log entry kinds
|
||||
try:
|
||||
found_count = LogEntryKind.select().count()
|
||||
return found_count > 0
|
||||
return bool(list(LogEntryKind.select().limit(1)))
|
||||
except:
|
||||
return False
|
||||
|
||||
|
|
|
@ -161,7 +161,7 @@ def v1():
|
|||
def health():
|
||||
client = app.config['HTTPCLIENT']
|
||||
|
||||
db_healthy = model.check_health()
|
||||
db_healthy = model.check_health(app.config)
|
||||
buildlogs_healthy = build_logs.check_health()
|
||||
|
||||
hostname_parts = app.config['SERVER_HOSTNAME'].split(':')
|
||||
|
@ -187,7 +187,7 @@ def health():
|
|||
@web.route('/status', methods=['GET'])
|
||||
@no_cache
|
||||
def status():
|
||||
db_healthy = model.check_health()
|
||||
db_healthy = model.check_health(app.config)
|
||||
buildlogs_healthy = build_logs.check_health()
|
||||
|
||||
response = jsonify({
|
||||
|
|
Reference in a new issue