2014-11-02 20:06:17 +00:00
|
|
|
import boto.rds2
|
|
|
|
import logging
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
class HealthCheck(object):
|
|
|
|
def __init__(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
def conduct_healthcheck(self, db_healthy, buildlogs_healthy):
|
|
|
|
"""
|
|
|
|
Conducts any custom healthcheck work, returning a dict representing the HealthCheck
|
|
|
|
output and a boolean indicating whether the instance is healthy.
|
|
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def get_check(cls, name, parameters):
|
|
|
|
for subc in cls.__subclasses__():
|
|
|
|
if subc.check_name() == name:
|
|
|
|
return subc(**parameters)
|
|
|
|
|
|
|
|
raise Exception('Unknown health check with name %s' % name)
|
|
|
|
|
|
|
|
|
|
|
|
class LocalHealthCheck(HealthCheck):
|
|
|
|
def __init__(self):
|
|
|
|
pass
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def check_name(cls):
|
|
|
|
return 'LocalHealthCheck'
|
|
|
|
|
|
|
|
def conduct_healthcheck(self, db_healthy, buildlogs_healthy):
|
|
|
|
data = {
|
|
|
|
'db_healthy': db_healthy,
|
|
|
|
'buildlogs_healthy': buildlogs_healthy
|
|
|
|
}
|
|
|
|
|
|
|
|
return (data, db_healthy and buildlogs_healthy)
|
|
|
|
|
|
|
|
|
|
|
|
class ProductionHealthCheck(HealthCheck):
|
|
|
|
def __init__(self, access_key, secret_key):
|
|
|
|
self.access_key = access_key
|
|
|
|
self.secret_key = secret_key
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def check_name(cls):
|
|
|
|
return 'ProductionHealthCheck'
|
|
|
|
|
2014-11-02 20:42:59 +00:00
|
|
|
def conduct_healthcheck(self, db_healthy, buildlogs_healthy):
|
2014-11-02 20:06:17 +00:00
|
|
|
data = {
|
|
|
|
'db_healthy': db_healthy,
|
|
|
|
'buildlogs_healthy': buildlogs_healthy
|
|
|
|
}
|
|
|
|
|
|
|
|
# Only report unhealthy if the machine cannot connect to the DB. Redis isn't required for
|
|
|
|
# mission critical/high avaliability operations.
|
|
|
|
if not db_healthy:
|
|
|
|
# If the database is marked as unhealthy, check the status of RDS directly. If RDS is
|
|
|
|
# reporting as available, then the problem is with this instance. Otherwise, the problem is
|
|
|
|
# with RDS, and we can keep this machine as 'healthy'.
|
|
|
|
is_rds_working = False
|
|
|
|
try:
|
|
|
|
region = boto.rds2.connect_to_region('us-east-1',
|
|
|
|
aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key)
|
|
|
|
response = region.describe_db_instances()['DescribeDBInstancesResponse']
|
|
|
|
result = response['DescribeDBInstancesResult']
|
|
|
|
instances = result['DBInstances']
|
|
|
|
status = instances[0]['DBInstanceStatus']
|
|
|
|
is_rds_working = status == 'available'
|
|
|
|
except:
|
|
|
|
logger.exception("Exception while checking RDS status")
|
|
|
|
pass
|
|
|
|
|
|
|
|
data['db_available_checked'] = True
|
|
|
|
data['db_available_status'] = is_rds_working
|
|
|
|
|
2014-11-02 20:42:59 +00:00
|
|
|
# If RDS is down, then we still report the machine as healthy, so that it can handle
|
|
|
|
# requests once RDS comes back up.
|
|
|
|
return (data, not is_rds_working)
|
2014-11-02 20:06:17 +00:00
|
|
|
|
|
|
|
return (data, db_healthy)
|