quay/buildman/manager/executor.py

import datetime
import hashlib
import logging
import os
import socket
import subprocess
import threading
import uuid

from functools import partial

import boto.ec2
import cachetools.func
import requests
import trollius

from container_cloud_config import CloudConfigContext
from jinja2 import FileSystemLoader, Environment
from trollius import coroutine, From, Return, get_event_loop

import release

from buildman.asyncutil import AsyncWrapper
from app import metric_queue, app
from util.metrics.metricqueue import duration_collector_async
from _init import ROOT_DIR


logger = logging.getLogger(__name__)


ONE_HOUR = 60*60

_TAG_RETRY_COUNT = 3 # Number of times to retry adding tags.
_TAG_RETRY_SLEEP = 2 # Number of seconds to wait between tag retries.

ENV = Environment(loader=FileSystemLoader(os.path.join(ROOT_DIR, "buildman/templates")))
TEMPLATE = ENV.get_template('cloudconfig.yaml')
CloudConfigContext().populate_jinja_environment(ENV)

class ExecutorException(Exception):
  """ Exception raised when there is a problem starting or stopping a builder.
  """
  pass


class BuilderExecutor(object):
  def __init__(self, executor_config, manager_hostname):
    """ Interface which can be plugged into the EphemeralNodeManager to provide a strategy for
        starting and stopping builders.
    """
    self.executor_config = executor_config
    self.manager_hostname = manager_hostname

    default_websocket_scheme = 'wss' if app.config['PREFERRED_URL_SCHEME'] == 'https' else 'ws'
    self.websocket_scheme = executor_config.get("WEBSOCKET_SCHEME", default_websocket_scheme)

  @property
  def name(self):
    """ Name returns the unique name for this executor. """
    return self.executor_config.get('NAME') or self.__class__.__name__

  @property
  def setup_time(self):
    """ Returns the amount of time (in seconds) to wait for the execution to start for the build.
        If None, the manager's default will be used.
    """
    return self.executor_config.get('SETUP_TIME')

  @coroutine
  def start_builder(self, realm, token, build_uuid):
    """ Create a builder with the specified config. Returns a unique id which can be used to manage
        the builder.
    """
    raise NotImplementedError

  @coroutine
  def stop_builder(self, builder_id):
    """ Stop a builder which is currently running.
    """
    raise NotImplementedError

  def allowed_for_namespace(self, namespace):
    """ Returns true if this executor can be used for builds in the given namespace. """

    # Check for an explicit namespace whitelist.
    namespace_whitelist = self.executor_config.get('NAMESPACE_WHITELIST')
    if namespace_whitelist is not None and namespace in namespace_whitelist:
      return True

    # Check for a staged rollout percentage. If found, we hash the namespace and, if it is found
    # in the first X% of the character space, we allow this executor to be used.
    staged_rollout = self.executor_config.get('STAGED_ROLLOUT')
    if staged_rollout is not None:
      bucket = int(hashlib.sha256(namespace).hexdigest()[-2:], 16)
      return bucket < (256 * staged_rollout)

    # If there are no restrictions in place, we are free to use this executor.
    return staged_rollout is None and namespace_whitelist is None

  @property
  def minimum_retry_threshold(self):
    """ Returns the minimum number of retries required for this executor to be used or 0 if
        none. """
    return self.executor_config.get('MINIMUM_RETRY_THRESHOLD', 0)

  def generate_cloud_config(self, realm, token, build_uuid, coreos_channel,
                            manager_hostname, quay_username=None,
                            quay_password=None):
    if quay_username is None:
      quay_username = self.executor_config['QUAY_USERNAME']

    if quay_password is None:
      quay_password = self.executor_config['QUAY_PASSWORD']

    return TEMPLATE.render(
      realm=realm,
      token=token,
      build_uuid=build_uuid,
      quay_username=quay_username,
      quay_password=quay_password,
      manager_hostname=manager_hostname,
      websocket_scheme=self.websocket_scheme,
      coreos_channel=coreos_channel,
      worker_image=self.executor_config.get('WORKER_IMAGE', 'quay.io/coreos/registry-build-worker'),
      worker_tag=self.executor_config['WORKER_TAG'],
      logentries_token=self.executor_config.get('LOGENTRIES_TOKEN', None),
      volume_size=self.executor_config.get('VOLUME_SIZE', '42G'),
      max_lifetime_s=self.executor_config.get('MAX_LIFETIME_S', 10800),
      ssh_authorized_keys=self.executor_config.get('SSH_AUTHORIZED_KEYS', []),
    )


class EC2Executor(BuilderExecutor):
  """ Implementation of BuilderExecutor which uses libcloud to start machines on a variety of cloud
      providers.
  """
  COREOS_STACK_URL = 'http://%s.release.core-os.net/amd64-usr/current/coreos_production_ami_hvm.txt'

  def __init__(self, *args, **kwargs):
    self._loop = get_event_loop()
    super(EC2Executor, self).__init__(*args, **kwargs)

  def _get_conn(self):
    """ Creates an ec2 connection which can be used to manage instances.
    """
    return AsyncWrapper(boto.ec2.connect_to_region(
      self.executor_config['EC2_REGION'],
      aws_access_key_id=self.executor_config['AWS_ACCESS_KEY'],
      aws_secret_access_key=self.executor_config['AWS_SECRET_KEY'],
    ))

  @classmethod
  @cachetools.func.ttl_cache(ttl=ONE_HOUR)
  def _get_coreos_ami(cls, ec2_region, coreos_channel):
    """ Retrieve the CoreOS AMI id from the canonical listing.
    """
    stack_list_string = requests.get(EC2Executor.COREOS_STACK_URL % coreos_channel).text
    stack_amis = dict([stack.split('=') for stack in stack_list_string.split('|')])
    return stack_amis[ec2_region]

  @coroutine
  @duration_collector_async(metric_queue.builder_time_to_start, ['ec2'])
  def start_builder(self, realm, token, build_uuid):
    region = self.executor_config['EC2_REGION']
    channel = self.executor_config.get('COREOS_CHANNEL', 'stable')

    coreos_ami = self.executor_config.get('COREOS_AMI', None)
    if coreos_ami is None:
      get_ami_callable = partial(self._get_coreos_ami, region, channel)
      coreos_ami = yield From(self._loop.run_in_executor(None, get_ami_callable))

    user_data = self.generate_cloud_config(realm, token, build_uuid, channel, self.manager_hostname)
    logger.debug('Generated cloud config for build %s: %s', build_uuid, user_data)

    ec2_conn = self._get_conn()

    ssd_root_ebs = boto.ec2.blockdevicemapping.BlockDeviceType(
      size=int(self.executor_config.get('BLOCK_DEVICE_SIZE', 48)),
      volume_type='gp2',
      delete_on_termination=True,
    )
    block_devices = boto.ec2.blockdevicemapping.BlockDeviceMapping()
    block_devices['/dev/xvda'] = ssd_root_ebs

    interfaces = None
    if self.executor_config.get('EC2_VPC_SUBNET_ID', None) is not None:
      interface = boto.ec2.networkinterface.NetworkInterfaceSpecification(
        subnet_id=self.executor_config['EC2_VPC_SUBNET_ID'],
        groups=self.executor_config['EC2_SECURITY_GROUP_IDS'],
        associate_public_ip_address=True,
      )
      interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(interface)

    try:
      reservation = yield From(ec2_conn.run_instances(
        coreos_ami,
        instance_type=self.executor_config['EC2_INSTANCE_TYPE'],
        key_name=self.executor_config.get('EC2_KEY_NAME', None),
        user_data=user_data,
        instance_initiated_shutdown_behavior='terminate',
        block_device_map=block_devices,
        network_interfaces=interfaces,
      ))
    except boto.exception.EC2ResponseError as ec2e:
      logger.exception('Unable to spawn builder instance')
      metric_queue.ephemeral_build_worker_failure.Inc()
      raise ec2e

    if not reservation.instances:
      raise ExecutorException('Unable to spawn builder instance.')
    elif len(reservation.instances) != 1:
      raise ExecutorException('EC2 started wrong number of instances!')

    launched = AsyncWrapper(reservation.instances[0])

    # Sleep a few seconds to wait for AWS to spawn the instance.
    yield From(trollius.sleep(_TAG_RETRY_SLEEP))

    # Tag the instance with its metadata.
    for i in range(0, _TAG_RETRY_COUNT):
      try:
        yield From(launched.add_tags({
          'Name': 'Quay Ephemeral Builder',
          'Realm': realm,
          'Token': token,
          'BuildUUID': build_uuid,
        }))
      except boto.exception.EC2ResponseError as ec2e:
        if ec2e.error_code == 'InvalidInstanceID.NotFound':
          if i < _TAG_RETRY_COUNT - 1:
            logger.warning('Failed to write EC2 tags for instance %s for build %s (attempt #%s)',
                           launched.id, build_uuid, i)
            yield From(trollius.sleep(_TAG_RETRY_SLEEP))
            continue

          raise ExecutorException('Unable to find builder instance.')

        logger.exception('Failed to write EC2 tags (attempt #%s)', i)

    logger.debug('Machine with ID %s started for build %s', launched.id, build_uuid)
    raise Return(launched.id)

  @coroutine
  def stop_builder(self, builder_id):
    try:
      ec2_conn = self._get_conn()
      terminated_instances = yield From(ec2_conn.terminate_instances([builder_id]))
    except boto.exception.EC2ResponseError as ec2e:
      if ec2e.error_code == 'InvalidInstanceID.NotFound':
        logger.debug('Instance %s already terminated', builder_id)
        return

      logger.exception('Exception when trying to terminate instance %s', builder_id)
      raise

    if builder_id not in [si.id for si in terminated_instances]:
      raise ExecutorException('Unable to terminate instance: %s' % builder_id)


class PopenExecutor(BuilderExecutor):
  """ Implementation of BuilderExecutor which uses Popen to fork a quay-builder process.
  """
  def __init__(self, executor_config, manager_hostname):
    self._jobs = {}

    super(PopenExecutor, self).__init__(executor_config, manager_hostname)

  """ Executor which uses Popen to fork a quay-builder process.
  """
  @coroutine
  @duration_collector_async(metric_queue.builder_time_to_start, ['fork'])
  def start_builder(self, realm, token, build_uuid):
    # Now start a machine for this job, adding the machine id to the etcd information
    logger.debug('Forking process for build')

    ws_host = os.environ.get("BUILDMAN_WS_HOST", "localhost")
    ws_port = os.environ.get("BUILDMAN_WS_PORT", "8787")
    builder_env = {
      'TOKEN': token,
      'REALM': realm,
      'ENDPOINT': 'ws://%s:%s' % (ws_host, ws_port),
      'DOCKER_TLS_VERIFY': os.environ.get('DOCKER_TLS_VERIFY', ''),
      'DOCKER_CERT_PATH': os.environ.get('DOCKER_CERT_PATH', ''),
      'DOCKER_HOST': os.environ.get('DOCKER_HOST', ''),
      'PATH': "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
    }

    logpipe = LogPipe(logging.INFO)
    spawned = subprocess.Popen(os.environ.get('BUILDER_BINARY_LOCATION',
                                              '/usr/local/bin/quay-builder'),
                               stdout=logpipe,
                               stderr=logpipe,
                               env=builder_env)

    builder_id = str(uuid.uuid4())
    self._jobs[builder_id] = (spawned, logpipe)
    logger.debug('Builder spawned with id: %s', builder_id)
    raise Return(builder_id)

  @coroutine
  def stop_builder(self, builder_id):
    if builder_id not in self._jobs:
      raise ExecutorException('Builder id not being tracked by executor.')

    logger.debug('Killing builder with id: %s', builder_id)
    spawned, logpipe = self._jobs[builder_id]

    if spawned.poll() is None:
      spawned.kill()
    logpipe.close()


class KubernetesExecutor(BuilderExecutor):
  """ Executes build jobs by creating Kubernetes jobs which run a qemu-kvm virtual
      machine in a pod """
  def __init__(self, *args, **kwargs):
    super(KubernetesExecutor, self).__init__(*args, **kwargs)
    self._loop = get_event_loop()
    self.namespace = self.executor_config.get('BUILDER_NAMESPACE', 'builder')
    self.image = self.executor_config.get('BUILDER_VM_CONTAINER_IMAGE',
                                          'quay.io/quay/quay-builder-qemu-coreos:stable')

  @coroutine
  def _request(self, method, path, **kwargs):
    request_options = dict(kwargs)

    tls_cert = self.executor_config.get('K8S_API_TLS_CERT')
    tls_key = self.executor_config.get('K8S_API_TLS_KEY')
    tls_ca = self.executor_config.get('K8S_API_TLS_CA')
    service_account_token = self.executor_config.get('SERVICE_ACCOUNT_TOKEN')

    if 'timeout' not in request_options:
      request_options['timeout'] = self.executor_config.get("K8S_API_TIMEOUT", 20)

    if service_account_token:
      scheme = 'https'
      request_options['headers'] = {'Authorization': 'Bearer ' + service_account_token}
      logger.debug('Using service account token for Kubernetes authentication')
    elif tls_cert and tls_key:
      scheme = 'https'
      request_options['cert'] = (tls_cert, tls_key)
      logger.debug('Using tls certificate and key for Kubernetes authentication')
      if tls_ca:
        request_options['verify'] = tls_ca
    else:
      scheme = 'http'

    server = self.executor_config.get('K8S_API_SERVER', 'localhost:8080')
    url = '%s://%s%s' % (scheme, server, path)

    logger.debug('Executor config: %s', self.executor_config)
    logger.debug('Kubernetes request: %s %s: %s', method, url, request_options)
    res = requests.request(method, url, **request_options)
    logger.debug('Kubernetes response: %s: %s', res.status_code, res.text)
    raise Return(res)

  def _jobs_path(self):
    return '/apis/batch/v1/namespaces/%s/jobs' % self.namespace

  def _job_path(self, build_uuid):
    return '%s/%s' % (self._jobs_path(), build_uuid)

  def _kubernetes_distribution(self):
    return self.executor_config.get('KUBERNETES_DISTRIBUTION', 'basic').lower()

  def _is_basic_kubernetes_distribution(self):
    return self._kubernetes_distribution() == 'basic'

  def _is_openshift_kubernetes_distribution(self):
    return self._kubernetes_distribution() == 'openshift'

  def _build_job_container_resources(self):
    # Minimum acceptable free resources for this container to "fit" in a quota
    # These may be lower than the absolute limits if the cluster is knowingly
    # oversubscribed by some amount.
    container_requests = {
      'memory' : self.executor_config.get('CONTAINER_MEMORY_REQUEST', '3968Mi'),
    }

    container_limits = {
      'memory' : self.executor_config.get('CONTAINER_MEMORY_LIMITS', '5120Mi'),
      'cpu' : self.executor_config.get('CONTAINER_CPU_LIMITS', '1000m'),
    }

    resources = {
      'requests': container_requests,
    }

    if self._is_openshift_kubernetes_distribution():
      resources['requests']['cpu'] = self.executor_config.get('CONTAINER_CPU_REQUEST', '500m')
      resources['limits'] = container_limits

    return resources

  def _build_job_containers(self, user_data):
    vm_memory_limit = self.executor_config.get('VM_MEMORY_LIMIT', '4G')
    vm_volume_size = self.executor_config.get('VOLUME_SIZE', '32G')

    container = {
      'name': 'builder',
      'imagePullPolicy': 'IfNotPresent',
      'image': self.image,
      'securityContext': {'privileged': True},
      'env': [
        {'name': 'USERDATA', 'value': user_data},
        {'name': 'VM_MEMORY', 'value': vm_memory_limit},
        {'name': 'VM_VOLUME_SIZE', 'value': vm_volume_size},
      ],
      'resources': self._build_job_container_resources(),
    }

    if self._is_basic_kubernetes_distribution():
      container['volumeMounts'] = [{'name': 'secrets-mask','mountPath': '/var/run/secrets/kubernetes.io/serviceaccount'}]

    return container

  def _job_resource(self, build_uuid, user_data, coreos_channel='stable'):
    image_pull_secret_name = self.executor_config.get('IMAGE_PULL_SECRET_NAME', 'builder')
    service_account = self.executor_config.get('SERVICE_ACCOUNT_NAME', 'quay-builder-sa')
    node_selector_label_key = self.executor_config.get('NODE_SELECTOR_LABEL_KEY', 'beta.kubernetes.io/instance-type')
    node_selector_label_value = self.executor_config.get('NODE_SELECTOR_LABEL_VALUE', '')

    node_selector = {
      node_selector_label_key : node_selector_label_value
    }

    release_sha = release.GIT_HEAD or 'none'
    if ' ' in release_sha:
      release_sha = 'HEAD'

    job_resource = {
      'apiVersion': 'batch/v1',
      'kind': 'Job',
      'metadata': {
        'namespace': self.namespace,
        'generateName': build_uuid + '-',
        'labels': {
          'build': build_uuid,
          'time': datetime.datetime.now().strftime('%Y-%m-%d-%H'),
          'manager': socket.gethostname(),
          'quay-sha': release_sha,
        },
      },
      'spec' : {
        'activeDeadlineSeconds': self.executor_config.get('MAXIMUM_JOB_TIME', 7200),
        'template': {
          'metadata': {
            'labels': {
              'build': build_uuid,
              'time': datetime.datetime.now().strftime('%Y-%m-%d-%H'),
              'manager': socket.gethostname(),
              'quay-sha': release_sha,
            },
          },
          'spec': {
            'imagePullSecrets': [{ 'name': image_pull_secret_name }],
            'restartPolicy': 'Never',
            'dnsPolicy': 'Default',
            'containers': [self._build_job_containers(user_data)],
          },
        },
      },
    }

    if self._is_openshift_kubernetes_distribution():
      # Setting `automountServiceAccountToken` to false will prevent automounting API credentials for a service account.
      job_resource['spec']['template']['spec']['automountServiceAccountToken'] = False

      # Use dedicated service account that has no authorization to any resources.
      job_resource['spec']['template']['spec']['serviceAccount'] = service_account

      # Setting `enableServiceLinks` to false prevents information about other services from being injected into pod's
      # environment variables. Pod has no visibility into other services on the cluster.
      job_resource['spec']['template']['spec']['enableServiceLinks'] = False

      if node_selector_label_value.strip() != '':
        job_resource['spec']['template']['spec']['nodeSelector'] = node_selector

    if self._is_basic_kubernetes_distribution():
      # This volume is a hack to mask the token for the namespace's
      # default service account, which is placed in a file mounted under
      # `/var/run/secrets/kubernetes.io/serviceaccount` in all pods.
      # There's currently no other way to just disable the service
      # account at either the pod or namespace level.
      #
      #   https://github.com/kubernetes/kubernetes/issues/16779
      #
      job_resource['spec']['template']['spec']['volumes'] = [{'name': 'secrets-mask','emptyDir': {'medium': 'Memory'}}]

    return job_resource

  @coroutine
  @duration_collector_async(metric_queue.builder_time_to_start, ['k8s'])
  def start_builder(self, realm, token, build_uuid):
    # generate resource
    channel = self.executor_config.get('COREOS_CHANNEL', 'stable')
    user_data = self.generate_cloud_config(realm, token, build_uuid, channel, self.manager_hostname)
    resource = self._job_resource(build_uuid, user_data, channel)
    logger.debug('Using Kubernetes Distribution: %s', self._kubernetes_distribution())
    logger.debug('Generated kubernetes resource:\n%s', resource)

    # schedule
    create_job = yield From(self._request('POST', self._jobs_path(), json=resource))
    if int(create_job.status_code / 100) != 2:
      raise ExecutorException('Failed to create job: %s: %s: %s' %
                              (build_uuid, create_job.status_code, create_job.text))

    job = create_job.json()
    raise Return(job['metadata']['name'])

  @coroutine
  def stop_builder(self, builder_id):
    pods_path = '/api/v1/namespaces/%s/pods' % self.namespace

    # Delete the job itself.
    try:
      yield From(self._request('DELETE', self._job_path(builder_id)))
    except:
      logger.exception('Failed to send delete job call for job %s', builder_id)

    # Delete the pod(s) for the job.
    selectorString = "job-name=%s" % builder_id
    try:
      yield From(self._request('DELETE', pods_path, params=dict(labelSelector=selectorString)))
    except:
      logger.exception("Failed to send delete pod call for job %s", builder_id)


class LogPipe(threading.Thread):
  """ Adapted from http://codereview.stackexchange.com/a/17959
  """
  def __init__(self, level):
    """Setup the object with a logger and a loglevel
    and start the thread
    """
    threading.Thread.__init__(self)
    self.daemon = False
    self.level = level
    self.fd_read, self.fd_write = os.pipe()
    self.pipe_reader = os.fdopen(self.fd_read)
    self.start()

  def fileno(self):
    """Return the write file descriptor of the pipe
    """
    return self.fd_write

  def run(self):
    """Run the thread, logging everything.
    """
    for line in iter(self.pipe_reader.readline, ''):
      logging.log(self.level, line.strip('\n'))

    self.pipe_reader.close()

  def close(self):
    """Close the write end of the pipe.
    """
    os.close(self.fd_write)
initial import for Open Source 🎉 2019-11-12 16:09:47 +00:00			`import datetime`
			`import hashlib`
			`import logging`
			`import os`
			`import socket`
			`import subprocess`
			`import threading`
			`import uuid`

			`from functools import partial`

			`import boto.ec2`
			`import cachetools.func`
			`import requests`
			`import trollius`

			`from container_cloud_config import CloudConfigContext`
			`from jinja2 import FileSystemLoader, Environment`
			`from trollius import coroutine, From, Return, get_event_loop`

			`import release`

			`from buildman.asyncutil import AsyncWrapper`
			`from app import metric_queue, app`
			`from util.metrics.metricqueue import duration_collector_async`
			`from _init import ROOT_DIR`


			`logger = logging.getLogger(__name__)`


			`ONE_HOUR = 60*60`

			`_TAG_RETRY_COUNT = 3 # Number of times to retry adding tags.`
			`_TAG_RETRY_SLEEP = 2 # Number of seconds to wait between tag retries.`

			`ENV = Environment(loader=FileSystemLoader(os.path.join(ROOT_DIR, "buildman/templates")))`
			`TEMPLATE = ENV.get_template('cloudconfig.yaml')`
			`CloudConfigContext().populate_jinja_environment(ENV)`

			`class ExecutorException(Exception):`
			`""" Exception raised when there is a problem starting or stopping a builder.`
			`"""`
			`pass`


			`class BuilderExecutor(object):`
			`def __init__(self, executor_config, manager_hostname):`
			`""" Interface which can be plugged into the EphemeralNodeManager to provide a strategy for`
			`starting and stopping builders.`
			`"""`
			`self.executor_config = executor_config`
			`self.manager_hostname = manager_hostname`

			`default_websocket_scheme = 'wss' if app.config['PREFERRED_URL_SCHEME'] == 'https' else 'ws'`
			`self.websocket_scheme = executor_config.get("WEBSOCKET_SCHEME", default_websocket_scheme)`

			`@property`
			`def name(self):`
			`""" Name returns the unique name for this executor. """`
			`return self.executor_config.get('NAME') or self.__class__.__name__`

			`@property`
			`def setup_time(self):`
			`""" Returns the amount of time (in seconds) to wait for the execution to start for the build.`
			`If None, the manager's default will be used.`
			`"""`
			`return self.executor_config.get('SETUP_TIME')`

			`@coroutine`
			`def start_builder(self, realm, token, build_uuid):`
			`""" Create a builder with the specified config. Returns a unique id which can be used to manage`
			`the builder.`
			`"""`
			`raise NotImplementedError`

			`@coroutine`
			`def stop_builder(self, builder_id):`
			`""" Stop a builder which is currently running.`
			`"""`
			`raise NotImplementedError`

			`def allowed_for_namespace(self, namespace):`
			`""" Returns true if this executor can be used for builds in the given namespace. """`

			`# Check for an explicit namespace whitelist.`
			`namespace_whitelist = self.executor_config.get('NAMESPACE_WHITELIST')`
			`if namespace_whitelist is not None and namespace in namespace_whitelist:`
			`return True`

			`# Check for a staged rollout percentage. If found, we hash the namespace and, if it is found`
			`# in the first X% of the character space, we allow this executor to be used.`
			`staged_rollout = self.executor_config.get('STAGED_ROLLOUT')`
			`if staged_rollout is not None:`
			`bucket = int(hashlib.sha256(namespace).hexdigest()[-2:], 16)`
			`return bucket < (256 * staged_rollout)`

			`# If there are no restrictions in place, we are free to use this executor.`
			`return staged_rollout is None and namespace_whitelist is None`

			`@property`
			`def minimum_retry_threshold(self):`
			`""" Returns the minimum number of retries required for this executor to be used or 0 if`
			`none. """`
			`return self.executor_config.get('MINIMUM_RETRY_THRESHOLD', 0)`

			`def generate_cloud_config(self, realm, token, build_uuid, coreos_channel,`
			`manager_hostname, quay_username=None,`
			`quay_password=None):`
			`if quay_username is None:`
			`quay_username = self.executor_config['QUAY_USERNAME']`

			`if quay_password is None:`
			`quay_password = self.executor_config['QUAY_PASSWORD']`

			`return TEMPLATE.render(`
			`realm=realm,`
			`token=token,`
			`build_uuid=build_uuid,`
			`quay_username=quay_username,`
			`quay_password=quay_password,`
			`manager_hostname=manager_hostname,`
			`websocket_scheme=self.websocket_scheme,`
			`coreos_channel=coreos_channel,`
			`worker_image=self.executor_config.get('WORKER_IMAGE', 'quay.io/coreos/registry-build-worker'),`
			`worker_tag=self.executor_config['WORKER_TAG'],`
			`logentries_token=self.executor_config.get('LOGENTRIES_TOKEN', None),`
			`volume_size=self.executor_config.get('VOLUME_SIZE', '42G'),`
			`max_lifetime_s=self.executor_config.get('MAX_LIFETIME_S', 10800),`
			`ssh_authorized_keys=self.executor_config.get('SSH_AUTHORIZED_KEYS', []),`
			`)`


			`class EC2Executor(BuilderExecutor):`
			`""" Implementation of BuilderExecutor which uses libcloud to start machines on a variety of cloud`
			`providers.`
			`"""`
			`COREOS_STACK_URL = 'http://%s.release.core-os.net/amd64-usr/current/coreos_production_ami_hvm.txt'`

			`def __init__(self, args, *kwargs):`
			`self._loop = get_event_loop()`
			`super(EC2Executor, self).__init__(args, *kwargs)`

			`def _get_conn(self):`
			`""" Creates an ec2 connection which can be used to manage instances.`
			`"""`
			`return AsyncWrapper(boto.ec2.connect_to_region(`
			`self.executor_config['EC2_REGION'],`
			`aws_access_key_id=self.executor_config['AWS_ACCESS_KEY'],`
			`aws_secret_access_key=self.executor_config['AWS_SECRET_KEY'],`
			`))`

			`@classmethod`
			`@cachetools.func.ttl_cache(ttl=ONE_HOUR)`
			`def _get_coreos_ami(cls, ec2_region, coreos_channel):`
			`""" Retrieve the CoreOS AMI id from the canonical listing.`
			`"""`
			`stack_list_string = requests.get(EC2Executor.COREOS_STACK_URL % coreos_channel).text`
			`stack_amis = dict([stack.split('=') for stack in stack_list_string.split('\|')])`
			`return stack_amis[ec2_region]`

			`@coroutine`
			`@duration_collector_async(metric_queue.builder_time_to_start, ['ec2'])`
			`def start_builder(self, realm, token, build_uuid):`
			`region = self.executor_config['EC2_REGION']`
			`channel = self.executor_config.get('COREOS_CHANNEL', 'stable')`

			`coreos_ami = self.executor_config.get('COREOS_AMI', None)`
			`if coreos_ami is None:`
			`get_ami_callable = partial(self._get_coreos_ami, region, channel)`
			`coreos_ami = yield From(self._loop.run_in_executor(None, get_ami_callable))`

			`user_data = self.generate_cloud_config(realm, token, build_uuid, channel, self.manager_hostname)`
			`logger.debug('Generated cloud config for build %s: %s', build_uuid, user_data)`

			`ec2_conn = self._get_conn()`

			`ssd_root_ebs = boto.ec2.blockdevicemapping.BlockDeviceType(`
			`size=int(self.executor_config.get('BLOCK_DEVICE_SIZE', 48)),`
			`volume_type='gp2',`
			`delete_on_termination=True,`
			`)`
			`block_devices = boto.ec2.blockdevicemapping.BlockDeviceMapping()`
			`block_devices['/dev/xvda'] = ssd_root_ebs`

			`interfaces = None`
			`if self.executor_config.get('EC2_VPC_SUBNET_ID', None) is not None:`
			`interface = boto.ec2.networkinterface.NetworkInterfaceSpecification(`
			`subnet_id=self.executor_config['EC2_VPC_SUBNET_ID'],`
			`groups=self.executor_config['EC2_SECURITY_GROUP_IDS'],`
			`associate_public_ip_address=True,`
			`)`
			`interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(interface)`

			`try:`
			`reservation = yield From(ec2_conn.run_instances(`
			`coreos_ami,`
			`instance_type=self.executor_config['EC2_INSTANCE_TYPE'],`
			`key_name=self.executor_config.get('EC2_KEY_NAME', None),`
			`user_data=user_data,`
			`instance_initiated_shutdown_behavior='terminate',`
			`block_device_map=block_devices,`
			`network_interfaces=interfaces,`
			`))`
			`except boto.exception.EC2ResponseError as ec2e:`
			`logger.exception('Unable to spawn builder instance')`
			`metric_queue.ephemeral_build_worker_failure.Inc()`
			`raise ec2e`

			`if not reservation.instances:`
			`raise ExecutorException('Unable to spawn builder instance.')`
			`elif len(reservation.instances) != 1:`
			`raise ExecutorException('EC2 started wrong number of instances!')`

			`launched = AsyncWrapper(reservation.instances[0])`

			`# Sleep a few seconds to wait for AWS to spawn the instance.`
			`yield From(trollius.sleep(_TAG_RETRY_SLEEP))`

			`# Tag the instance with its metadata.`
			`for i in range(0, _TAG_RETRY_COUNT):`
			`try:`
			`yield From(launched.add_tags({`
			`'Name': 'Quay Ephemeral Builder',`
			`'Realm': realm,`
			`'Token': token,`
			`'BuildUUID': build_uuid,`
			`}))`
			`except boto.exception.EC2ResponseError as ec2e:`
			`if ec2e.error_code == 'InvalidInstanceID.NotFound':`
			`if i < _TAG_RETRY_COUNT - 1:`
			`logger.warning('Failed to write EC2 tags for instance %s for build %s (attempt #%s)',`
			`launched.id, build_uuid, i)`
			`yield From(trollius.sleep(_TAG_RETRY_SLEEP))`
			`continue`

			`raise ExecutorException('Unable to find builder instance.')`

			`logger.exception('Failed to write EC2 tags (attempt #%s)', i)`

			`logger.debug('Machine with ID %s started for build %s', launched.id, build_uuid)`
			`raise Return(launched.id)`

			`@coroutine`
			`def stop_builder(self, builder_id):`
			`try:`
			`ec2_conn = self._get_conn()`
			`terminated_instances = yield From(ec2_conn.terminate_instances([builder_id]))`
			`except boto.exception.EC2ResponseError as ec2e:`
			`if ec2e.error_code == 'InvalidInstanceID.NotFound':`
			`logger.debug('Instance %s already terminated', builder_id)`
			`return`

			`logger.exception('Exception when trying to terminate instance %s', builder_id)`
			`raise`

			`if builder_id not in [si.id for si in terminated_instances]:`
			`raise ExecutorException('Unable to terminate instance: %s' % builder_id)`


			`class PopenExecutor(BuilderExecutor):`
			`""" Implementation of BuilderExecutor which uses Popen to fork a quay-builder process.`
			`"""`
			`def __init__(self, executor_config, manager_hostname):`
			`self._jobs = {}`

			`super(PopenExecutor, self).__init__(executor_config, manager_hostname)`

			`""" Executor which uses Popen to fork a quay-builder process.`
			`"""`
			`@coroutine`
			`@duration_collector_async(metric_queue.builder_time_to_start, ['fork'])`
			`def start_builder(self, realm, token, build_uuid):`
			`# Now start a machine for this job, adding the machine id to the etcd information`
			`logger.debug('Forking process for build')`

			`ws_host = os.environ.get("BUILDMAN_WS_HOST", "localhost")`
			`ws_port = os.environ.get("BUILDMAN_WS_PORT", "8787")`
			`builder_env = {`
			`'TOKEN': token,`
			`'REALM': realm,`
			`'ENDPOINT': 'ws://%s:%s' % (ws_host, ws_port),`
			`'DOCKER_TLS_VERIFY': os.environ.get('DOCKER_TLS_VERIFY', ''),`
			`'DOCKER_CERT_PATH': os.environ.get('DOCKER_CERT_PATH', ''),`
			`'DOCKER_HOST': os.environ.get('DOCKER_HOST', ''),`
			`'PATH': "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"`
			`}`

			`logpipe = LogPipe(logging.INFO)`
			`spawned = subprocess.Popen(os.environ.get('BUILDER_BINARY_LOCATION',`
			`'/usr/local/bin/quay-builder'),`
			`stdout=logpipe,`
			`stderr=logpipe,`
			`env=builder_env)`

			`builder_id = str(uuid.uuid4())`
			`self._jobs[builder_id] = (spawned, logpipe)`
			`logger.debug('Builder spawned with id: %s', builder_id)`
			`raise Return(builder_id)`

			`@coroutine`
			`def stop_builder(self, builder_id):`
			`if builder_id not in self._jobs:`
			`raise ExecutorException('Builder id not being tracked by executor.')`

			`logger.debug('Killing builder with id: %s', builder_id)`
			`spawned, logpipe = self._jobs[builder_id]`

			`if spawned.poll() is None:`
			`spawned.kill()`
			`logpipe.close()`


			`class KubernetesExecutor(BuilderExecutor):`
			`""" Executes build jobs by creating Kubernetes jobs which run a qemu-kvm virtual`
			`machine in a pod """`
			`def __init__(self, args, *kwargs):`
			`super(KubernetesExecutor, self).__init__(args, *kwargs)`
			`self._loop = get_event_loop()`
			`self.namespace = self.executor_config.get('BUILDER_NAMESPACE', 'builder')`
			`self.image = self.executor_config.get('BUILDER_VM_CONTAINER_IMAGE',`
			`'quay.io/quay/quay-builder-qemu-coreos:stable')`

			`@coroutine`
			`def _request(self, method, path, **kwargs):`
			`request_options = dict(kwargs)`

			`tls_cert = self.executor_config.get('K8S_API_TLS_CERT')`
			`tls_key = self.executor_config.get('K8S_API_TLS_KEY')`
			`tls_ca = self.executor_config.get('K8S_API_TLS_CA')`
			`service_account_token = self.executor_config.get('SERVICE_ACCOUNT_TOKEN')`

			`if 'timeout' not in request_options:`
			`request_options['timeout'] = self.executor_config.get("K8S_API_TIMEOUT", 20)`

			`if service_account_token:`
			`scheme = 'https'`
			`request_options['headers'] = {'Authorization': 'Bearer ' + service_account_token}`
			`logger.debug('Using service account token for Kubernetes authentication')`
			`elif tls_cert and tls_key:`
			`scheme = 'https'`
			`request_options['cert'] = (tls_cert, tls_key)`
			`logger.debug('Using tls certificate and key for Kubernetes authentication')`
			`if tls_ca:`
			`request_options['verify'] = tls_ca`
			`else:`
			`scheme = 'http'`

			`server = self.executor_config.get('K8S_API_SERVER', 'localhost:8080')`
			`url = '%s://%s%s' % (scheme, server, path)`

			`logger.debug('Executor config: %s', self.executor_config)`
			`logger.debug('Kubernetes request: %s %s: %s', method, url, request_options)`
			`res = requests.request(method, url, **request_options)`
			`logger.debug('Kubernetes response: %s: %s', res.status_code, res.text)`
			`raise Return(res)`

			`def _jobs_path(self):`
			`return '/apis/batch/v1/namespaces/%s/jobs' % self.namespace`

			`def _job_path(self, build_uuid):`
			`return '%s/%s' % (self._jobs_path(), build_uuid)`

			`def _kubernetes_distribution(self):`
			`return self.executor_config.get('KUBERNETES_DISTRIBUTION', 'basic').lower()`

			`def _is_basic_kubernetes_distribution(self):`
			`return self._kubernetes_distribution() == 'basic'`

			`def _is_openshift_kubernetes_distribution(self):`
			`return self._kubernetes_distribution() == 'openshift'`

			`def _build_job_container_resources(self):`
			`# Minimum acceptable free resources for this container to "fit" in a quota`
			`# These may be lower than the absolute limits if the cluster is knowingly`
			`# oversubscribed by some amount.`
			`container_requests = {`
			`'memory' : self.executor_config.get('CONTAINER_MEMORY_REQUEST', '3968Mi'),`
			`}`

			`container_limits = {`
			`'memory' : self.executor_config.get('CONTAINER_MEMORY_LIMITS', '5120Mi'),`
			`'cpu' : self.executor_config.get('CONTAINER_CPU_LIMITS', '1000m'),`
			`}`

			`resources = {`
			`'requests': container_requests,`
			`}`

			`if self._is_openshift_kubernetes_distribution():`
			`resources['requests']['cpu'] = self.executor_config.get('CONTAINER_CPU_REQUEST', '500m')`
			`resources['limits'] = container_limits`

			`return resources`

			`def _build_job_containers(self, user_data):`
			`vm_memory_limit = self.executor_config.get('VM_MEMORY_LIMIT', '4G')`
			`vm_volume_size = self.executor_config.get('VOLUME_SIZE', '32G')`

			`container = {`
			`'name': 'builder',`
			`'imagePullPolicy': 'IfNotPresent',`
			`'image': self.image,`
			`'securityContext': {'privileged': True},`
			`'env': [`
			`{'name': 'USERDATA', 'value': user_data},`
			`{'name': 'VM_MEMORY', 'value': vm_memory_limit},`
			`{'name': 'VM_VOLUME_SIZE', 'value': vm_volume_size},`
			`],`
			`'resources': self._build_job_container_resources(),`
			`}`

			`if self._is_basic_kubernetes_distribution():`
			`container['volumeMounts'] = [{'name': 'secrets-mask','mountPath': '/var/run/secrets/kubernetes.io/serviceaccount'}]`

			`return container`

			`def _job_resource(self, build_uuid, user_data, coreos_channel='stable'):`
			`image_pull_secret_name = self.executor_config.get('IMAGE_PULL_SECRET_NAME', 'builder')`
			`service_account = self.executor_config.get('SERVICE_ACCOUNT_NAME', 'quay-builder-sa')`
			`node_selector_label_key = self.executor_config.get('NODE_SELECTOR_LABEL_KEY', 'beta.kubernetes.io/instance-type')`
			`node_selector_label_value = self.executor_config.get('NODE_SELECTOR_LABEL_VALUE', '')`

			`node_selector = {`
			`node_selector_label_key : node_selector_label_value`
			`}`

			`release_sha = release.GIT_HEAD or 'none'`
			`if ' ' in release_sha:`
			`release_sha = 'HEAD'`

			`job_resource = {`
			`'apiVersion': 'batch/v1',`
			`'kind': 'Job',`
			`'metadata': {`
			`'namespace': self.namespace,`
			`'generateName': build_uuid + '-',`
			`'labels': {`
			`'build': build_uuid,`
			`'time': datetime.datetime.now().strftime('%Y-%m-%d-%H'),`
			`'manager': socket.gethostname(),`
			`'quay-sha': release_sha,`
			`},`
			`},`
			`'spec' : {`
			`'activeDeadlineSeconds': self.executor_config.get('MAXIMUM_JOB_TIME', 7200),`
			`'template': {`
			`'metadata': {`
			`'labels': {`
			`'build': build_uuid,`
			`'time': datetime.datetime.now().strftime('%Y-%m-%d-%H'),`
			`'manager': socket.gethostname(),`
			`'quay-sha': release_sha,`
			`},`
			`},`
			`'spec': {`
			`'imagePullSecrets': [{ 'name': image_pull_secret_name }],`
			`'restartPolicy': 'Never',`
			`'dnsPolicy': 'Default',`
			`'containers': [self._build_job_containers(user_data)],`
			`},`
			`},`
			`},`
			`}`

			`if self._is_openshift_kubernetes_distribution():`
			# Setting `automountServiceAccountToken` to false will prevent automounting API credentials for a service account.
			`job_resource['spec']['template']['spec']['automountServiceAccountToken'] = False`

			`# Use dedicated service account that has no authorization to any resources.`
			`job_resource['spec']['template']['spec']['serviceAccount'] = service_account`

			# Setting `enableServiceLinks` to false prevents information about other services from being injected into pod's
			`# environment variables. Pod has no visibility into other services on the cluster.`
			`job_resource['spec']['template']['spec']['enableServiceLinks'] = False`

			`if node_selector_label_value.strip() != '':`
			`job_resource['spec']['template']['spec']['nodeSelector'] = node_selector`

			`if self._is_basic_kubernetes_distribution():`
			`# This volume is a hack to mask the token for the namespace's`
			`# default service account, which is placed in a file mounted under`
			# `/var/run/secrets/kubernetes.io/serviceaccount` in all pods.
			`# There's currently no other way to just disable the service`
			`# account at either the pod or namespace level.`
			`#`
			`# https://github.com/kubernetes/kubernetes/issues/16779`
			`#`
			`job_resource['spec']['template']['spec']['volumes'] = [{'name': 'secrets-mask','emptyDir': {'medium': 'Memory'}}]`

			`return job_resource`

			`@coroutine`
			`@duration_collector_async(metric_queue.builder_time_to_start, ['k8s'])`
			`def start_builder(self, realm, token, build_uuid):`
			`# generate resource`
			`channel = self.executor_config.get('COREOS_CHANNEL', 'stable')`
			`user_data = self.generate_cloud_config(realm, token, build_uuid, channel, self.manager_hostname)`
			`resource = self._job_resource(build_uuid, user_data, channel)`
			`logger.debug('Using Kubernetes Distribution: %s', self._kubernetes_distribution())`
			`logger.debug('Generated kubernetes resource:\n%s', resource)`

			`# schedule`
			`create_job = yield From(self._request('POST', self._jobs_path(), json=resource))`
			`if int(create_job.status_code / 100) != 2:`
			`raise ExecutorException('Failed to create job: %s: %s: %s' %`
			`(build_uuid, create_job.status_code, create_job.text))`

			`job = create_job.json()`
			`raise Return(job['metadata']['name'])`

			`@coroutine`
			`def stop_builder(self, builder_id):`
			`pods_path = '/api/v1/namespaces/%s/pods' % self.namespace`

			`# Delete the job itself.`
			`try:`
			`yield From(self._request('DELETE', self._job_path(builder_id)))`
			`except:`
			`logger.exception('Failed to send delete job call for job %s', builder_id)`

			`# Delete the pod(s) for the job.`
			`selectorString = "job-name=%s" % builder_id`
			`try:`
			`yield From(self._request('DELETE', pods_path, params=dict(labelSelector=selectorString)))`
			`except:`
			`logger.exception("Failed to send delete pod call for job %s", builder_id)`


			`class LogPipe(threading.Thread):`
			`""" Adapted from http://codereview.stackexchange.com/a/17959`
			`"""`
			`def __init__(self, level):`
			`"""Setup the object with a logger and a loglevel`
			`and start the thread`
			`"""`
			`threading.Thread.__init__(self)`
			`self.daemon = False`
			`self.level = level`
			`self.fd_read, self.fd_write = os.pipe()`
			`self.pipe_reader = os.fdopen(self.fd_read)`
			`self.start()`

			`def fileno(self):`
			`"""Return the write file descriptor of the pipe`
			`"""`
			`return self.fd_write`

			`def run(self):`
			`"""Run the thread, logging everything.`
			`"""`
			`for line in iter(self.pipe_reader.readline, ''):`
			`logging.log(self.level, line.strip('\n'))`

			`self.pipe_reader.close()`

			`def close(self):`
			`"""Close the write end of the pipe.`
			`"""`
			`os.close(self.fd_write)`