Switch a few of the buildman methods to coroutines in order to support network calls in methods. Add a test for the ephemeral build manager.

This commit is contained in:
Jake Moshenko 2014-12-22 12:14:16 -05:00
parent a280bbcb6d
commit 12ee8e0fc0
11 changed files with 233 additions and 52 deletions

View file

@ -1,12 +1,15 @@
import logging
import etcd
import uuid
import calendar
from datetime import datetime, timedelta
from trollius import From, coroutine, Return
from buildman.manager.basemanager import BaseManager
from buildman.manager.executor import PopenExecutor, EC2Executor
from buildman.component.buildcomponent import BuildComponent
from buildman.asyncutil import AsyncWrapper
logger = logging.getLogger(__name__)
@ -32,6 +35,13 @@ class EphemeralBuilderManager(BaseManager):
""" Build manager implementation for the Enterprise Registry. """
shutting_down = False
_executors = {
'popen': PopenExecutor,
'ec2': EC2Executor,
}
_etcd_client_klass = etcd.Client
def __init__(self, *args, **kwargs):
self._manager_config = None
self._etcd_client = None
@ -39,10 +49,6 @@ class EphemeralBuilderManager(BaseManager):
self._component_to_job = {}
self._component_to_builder = {}
self._executors = {
'popen': PopenExecutor,
'ec2': EC2Executor,
}
self._executor = None
super(EphemeralBuilderManager, self).__init__(*args, **kwargs)
@ -58,9 +64,8 @@ class EphemeralBuilderManager(BaseManager):
etcd_host = self._manager_config.get('ETCD_HOST', '127.0.0.1')
etcd_port = self._manager_config.get('ETCD_PORT', 2379)
logger.debug('Connecting to etcd on %s:%s', etcd_host, etcd_port)
self._etcd_client = etcd.Client(host=etcd_host, port=etcd_port)
clear_etcd(self._etcd_client)
self._etcd_client = AsyncWrapper(self._etcd_client_klass(host=etcd_host, port=etcd_port))
def setup_time(self):
setup_time = self._manager_config.get('MACHINE_SETUP_TIME', 300)
@ -71,13 +76,14 @@ class EphemeralBuilderManager(BaseManager):
logger.debug('Calling shutdown.')
raise NotImplementedError
@coroutine
def schedule(self, build_job, loop):
logger.debug('Calling schedule with job: %s', build_job.repo_build.uuid)
logger.debug('Calling schedule with job: %s', build_job.job_details['build_uuid'])
# Check if there are worker slots avialable by checking the number of jobs in etcd
allowed_worker_count = self._manager_config.get('ALLOWED_WORKER_COUNT', 2)
allowed_worker_count = self._manager_config.get('ALLOWED_WORKER_COUNT', 1)
try:
building = self._etcd_client.read(ETCD_BUILDER_PREFIX, recursive=True)
building = yield From(self._etcd_client.read(ETCD_BUILDER_PREFIX, recursive=True))
workers_alive = sum(1 for child in building.children if not child.dir)
except KeyError:
workers_alive = 0
@ -87,7 +93,7 @@ class EphemeralBuilderManager(BaseManager):
if workers_alive >= allowed_worker_count:
logger.info('Too many workers alive, unable to start new worker. %s >= %s', workers_alive,
allowed_worker_count)
return False
raise Return(False)
job_key = self._etcd_job_key(build_job)
@ -97,28 +103,33 @@ class EphemeralBuilderManager(BaseManager):
expiration = datetime.utcnow() + timedelta(seconds=self.setup_time())
payload = {
'expiration': expiration.isoformat(),
'expiration': calendar.timegm(expiration.timetuple()),
}
try:
self._etcd_client.write(job_key, payload, prevExist=False)
yield From(self._etcd_client.write(job_key, payload, prevExist=False))
component = self.register_component(realm, BuildComponent, token=token)
self._component_to_job[component] = build_job
except KeyError:
# The job was already taken by someone else, we are probably a retry
logger.warning('Job already exists in etcd, did an old worker die?')
return False
logger.error('Job already exists in etcd, are timeouts misconfigured or is the queue broken?')
raise Return(False)
builder_id = self._executor.start_builder(realm, token)
logger.debug('Starting builder with executor: %s', self._executor)
builder_id = yield From(self._executor.start_builder(realm, token))
self._component_to_builder[component] = builder_id
return True
# Store the builder in etcd associated with the job id
payload['builder_id'] = builder_id
yield From(self._etcd_client.write(job_key, payload, prevExist=True))
raise Return(True)
def build_component_ready(self, build_component, loop):
try:
job = self._component_to_job.pop(build_component)
logger.debug('Sending build %s to newly ready component on realm %s', job.repo_build.uuid,
build_component.builder_realm)
logger.debug('Sending build %s to newly ready component on realm %s',
job.job_details['build_uuid'], build_component.builder_realm)
loop.call_soon(build_component.start_build, job)
except KeyError:
logger.warning('Builder is asking for more work, but work already completed')
@ -126,6 +137,7 @@ class EphemeralBuilderManager(BaseManager):
def build_component_disposed(self, build_component, timed_out):
logger.debug('Calling build_component_disposed.')
@coroutine
def job_completed(self, build_job, job_status, build_component):
logger.debug('Calling job_completed with status: %s', job_status)
@ -134,12 +146,24 @@ class EphemeralBuilderManager(BaseManager):
# Release the lock in etcd
job_key = self._etcd_job_key(build_job)
self._etcd_client.delete(job_key)
yield From(self._etcd_client.delete(job_key))
self.job_complete_callback(build_job, job_status)
@coroutine
def _clean_up_old_builder(self, job_key, job_payload):
""" Terminate an old builders once the expiration date has passed.
"""
logger.debug('Cleaning up the old builder for job: %s', job_key)
if 'builder_id' in job_payload:
logger.info('Terminating expired build node.')
yield From(self._executor.stop_builder(job_payload['builder_id']))
yield From(self._etcd_client.delete(job_key))
@staticmethod
def _etcd_job_key(build_job):
""" Create a key which is used to track a job in etcd.
"""
return '{0}{1}'.format(ETCD_BUILDER_PREFIX, build_job.repo_build.uuid)
return '{0}{1}'.format(ETCD_BUILDER_PREFIX, build_job.job_details['build_uuid'])