quay/health/healthcheck.py

import boto.rds2
import logging

logger = logging.getLogger(__name__)

class HealthCheck(object):
  def __init__(self):
    pass

  def conduct_healthcheck(self, db_healthy, buildlogs_healthy):
    """
    Conducts any custom healthcheck work, returning a dict representing the HealthCheck
    output and a boolean indicating whether the instance is healthy.
    """
    raise NotImplementedError

  @classmethod
  def get_check(cls, name, parameters):
    for subc in cls.__subclasses__():
      if subc.check_name() == name:
        return subc(**parameters)

    raise Exception('Unknown health check with name %s' % name)


class LocalHealthCheck(HealthCheck):
  def __init__(self):
    pass

  @classmethod
  def check_name(cls):
    return 'LocalHealthCheck'

  def conduct_healthcheck(self, db_healthy, buildlogs_healthy):
    data = {
      'db_healthy': db_healthy,
      'buildlogs_healthy': buildlogs_healthy
    }

    return (data, db_healthy and buildlogs_healthy)


class ProductionHealthCheck(HealthCheck):
  def __init__(self, access_key, secret_key):
    self.access_key = access_key
    self.secret_key = secret_key

  @classmethod
  def check_name(cls):
    return 'ProductionHealthCheck'

  def conduct_healthcheck(self, db_healthy, buildlogs_healthy):
    data = {
      'db_healthy': db_healthy,
      'buildlogs_healthy': buildlogs_healthy
    }

    # Only report unhealthy if the machine cannot connect to the DB. Redis isn't required for
    # mission critical/high avaliability operations.
    if not db_healthy:
      # If the database is marked as unhealthy, check the status of RDS directly. If RDS is
      # reporting as available, then the problem is with this instance. Otherwise, the problem is
      # with RDS, and we can keep this machine as 'healthy'.
      is_rds_working = False
      try:
        region = boto.rds2.connect_to_region('us-east-1',
          aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key)
        response = region.describe_db_instances()['DescribeDBInstancesResponse']
        result = response['DescribeDBInstancesResult']
        instances = result['DBInstances']
        status = instances[0]['DBInstanceStatus']
        is_rds_working = status == 'available'
      except:
        logger.exception("Exception while checking RDS status")
        pass

      data['db_available_checked'] = True
      data['db_available_status'] = is_rds_working

      # If RDS is down, then we still report the machine as healthy, so that it can handle
      # requests once RDS comes back up.
      return (data, not is_rds_working)

    return (data, db_healthy)
Add a new configurable health check, to make sure production instances are not taken down by Redis or non-local DB issues 2014-11-02 20:06:17 +00:00			`import boto.rds2`
			`import logging`

			`logger = logging.getLogger(__name__)`

			`class HealthCheck(object):`
			`def __init__(self):`
			`pass`

			`def conduct_healthcheck(self, db_healthy, buildlogs_healthy):`
			`"""`
			`Conducts any custom healthcheck work, returning a dict representing the HealthCheck`
			`output and a boolean indicating whether the instance is healthy.`
			`"""`
Strip whitespace from ALL the things. 2014-11-24 21:07:38 +00:00			`raise NotImplementedError`
Add a new configurable health check, to make sure production instances are not taken down by Redis or non-local DB issues 2014-11-02 20:06:17 +00:00
			`@classmethod`
			`def get_check(cls, name, parameters):`
			`for subc in cls.__subclasses__():`
			`if subc.check_name() == name:`
			`return subc(**parameters)`

			`raise Exception('Unknown health check with name %s' % name)`


			`class LocalHealthCheck(HealthCheck):`
			`def __init__(self):`
			`pass`

			`@classmethod`
			`def check_name(cls):`
			`return 'LocalHealthCheck'`

			`def conduct_healthcheck(self, db_healthy, buildlogs_healthy):`
			`data = {`
			`'db_healthy': db_healthy,`
			`'buildlogs_healthy': buildlogs_healthy`
			`}`

			`return (data, db_healthy and buildlogs_healthy)`


			`class ProductionHealthCheck(HealthCheck):`
			`def __init__(self, access_key, secret_key):`
			`self.access_key = access_key`
			`self.secret_key = secret_key`
Strip whitespace from ALL the things. 2014-11-24 21:07:38 +00:00
Add a new configurable health check, to make sure production instances are not taken down by Redis or non-local DB issues 2014-11-02 20:06:17 +00:00			`@classmethod`
			`def check_name(cls):`
			`return 'ProductionHealthCheck'`

Strip whitespace from ALL the things. 2014-11-24 21:07:38 +00:00			`def conduct_healthcheck(self, db_healthy, buildlogs_healthy):`
Add a new configurable health check, to make sure production instances are not taken down by Redis or non-local DB issues 2014-11-02 20:06:17 +00:00			`data = {`
			`'db_healthy': db_healthy,`
			`'buildlogs_healthy': buildlogs_healthy`
			`}`

			`# Only report unhealthy if the machine cannot connect to the DB. Redis isn't required for`
			`# mission critical/high avaliability operations.`
			`if not db_healthy:`
			`# If the database is marked as unhealthy, check the status of RDS directly. If RDS is`
			`# reporting as available, then the problem is with this instance. Otherwise, the problem is`
			`# with RDS, and we can keep this machine as 'healthy'.`
			`is_rds_working = False`
			`try:`
			`region = boto.rds2.connect_to_region('us-east-1',`
			`aws_access_key_id=self.access_key, aws_secret_access_key=self.secret_key)`
			`response = region.describe_db_instances()['DescribeDBInstancesResponse']`
			`result = response['DescribeDBInstancesResult']`
			`instances = result['DBInstances']`
			`status = instances[0]['DBInstanceStatus']`
			`is_rds_working = status == 'available'`
			`except:`
			`logger.exception("Exception while checking RDS status")`
			`pass`

			`data['db_available_checked'] = True`
			`data['db_available_status'] = is_rds_working`

Clarify the health checking logic and remove the accidental inclusion of the override 2014-11-02 20:42:59 +00:00			`# If RDS is down, then we still report the machine as healthy, so that it can handle`
			`# requests once RDS comes back up.`
			`return (data, not is_rds_working)`
Add a new configurable health check, to make sure production instances are not taken down by Redis or non-local DB issues 2014-11-02 20:06:17 +00:00
Strip whitespace from ALL the things. 2014-11-24 21:07:38 +00:00			`return (data, db_healthy)`