From 92d32bc6364a4808d112c6d679aaf375515763e6 Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Tue, 20 Jan 2015 14:46:22 -0500 Subject: [PATCH] Make the DB health check first attempt a simple DB connection. If the database is in the middle of a failover, this will fail after 3 seconds (the connection timeout specified), rather than hanging and causing the ELB health checks to timeout and fail. --- data/database.py | 12 ++++++++++++ data/model/legacy.py | 17 +++++++++++++---- endpoints/web.py | 4 ++-- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/data/database.py b/data/database.py index 84d5cd879..aba8a578d 100644 --- a/data/database.py +++ b/data/database.py @@ -70,6 +70,14 @@ read_slave = Proxy() db_random_func = CallableProxy() +def validate_database_url(url, connect_timeout=5): + driver = _db_from_url(url, { + 'connect_timeout': connect_timeout + }) + driver.connect() + driver.close() + + def _db_from_url(url, db_kwargs): parsed_url = make_url(url) @@ -82,6 +90,10 @@ def _db_from_url(url, db_kwargs): if parsed_url.password: db_kwargs['password'] = parsed_url.password + # Note: sqlite does not support connect_timeout. + if parsed_url.drivername == 'sqlite' and 'connect_timeout' in db_kwargs: + del db_kwargs['connect_timeout'] + return SCHEME_DRIVERS[parsed_url.drivername](parsed_url.database, **db_kwargs) diff --git a/data/model/legacy.py b/data/model/legacy.py index a5c779871..f8c04e04c 100644 --- a/data/model/legacy.py +++ b/data/model/legacy.py @@ -14,7 +14,7 @@ from data.database import (User, Repository, Image, AccessToken, Role, Repositor ExternalNotificationEvent, ExternalNotificationMethod, RepositoryNotification, RepositoryAuthorizedEmail, TeamMemberInvite, DerivedImageStorage, ImageStorageTransformation, random_string_generator, - db, BUILD_PHASE, QuayUserField) + db, BUILD_PHASE, QuayUserField, validate_database_url) from peewee import JOIN_LEFT_OUTER, fn from util.validation import (validate_username, validate_email, validate_password, INVALID_PASSWORD_MESSAGE) @@ -2257,11 +2257,20 @@ def delete_user(user): # TODO: also delete any repository data associated -def check_health(): +def check_health(app_config): + # Attempt to connect to the database first. If the DB is not responding, + # using the validate_database_url will timeout quickly, as opposed to + # making a normal connect which will just hang (thus breaking the health + # check). + try: + validate_database_url(app_config['DB_URI'], connect_timeout=3) + except Exception: + logger.exception('Could not connect to the database') + return False + # We will connect to the db, check that it contains some log entry kinds try: - found_count = LogEntryKind.select().count() - return found_count > 0 + return bool(list(LogEntryKind.select().limit(1))) except: return False diff --git a/endpoints/web.py b/endpoints/web.py index 439e5c3fe..b91b3745e 100644 --- a/endpoints/web.py +++ b/endpoints/web.py @@ -161,7 +161,7 @@ def v1(): def health(): client = app.config['HTTPCLIENT'] - db_healthy = model.check_health() + db_healthy = model.check_health(app.config) buildlogs_healthy = build_logs.check_health() hostname_parts = app.config['SERVER_HOSTNAME'].split(':') @@ -187,7 +187,7 @@ def health(): @web.route('/status', methods=['GET']) @no_cache def status(): - db_healthy = model.check_health() + db_healthy = model.check_health(app.config) buildlogs_healthy = build_logs.check_health() response = jsonify({