Switch a few of the buildman methods to coroutines in order to support network calls in methods. Add a test for the ephemeral build manager.
This commit is contained in:
parent
a280bbcb6d
commit
12ee8e0fc0
11 changed files with 233 additions and 52 deletions
|
@ -1,3 +1,5 @@
|
|||
from trollius import coroutine
|
||||
|
||||
class BaseManager(object):
|
||||
""" Base for all worker managers. """
|
||||
def __init__(self, register_component, unregister_component, job_heartbeat_callback,
|
||||
|
@ -26,6 +28,7 @@ class BaseManager(object):
|
|||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@coroutine
|
||||
def schedule(self, build_job, loop):
|
||||
""" Schedules a queue item to be built. Returns True if the item was properly scheduled
|
||||
and False if all workers are busy.
|
||||
|
@ -48,8 +51,11 @@ class BaseManager(object):
|
|||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@coroutine
|
||||
def job_completed(self, build_job, job_status, build_component):
|
||||
""" Method invoked once a job_item has completed, in some manner. The job_status will be
|
||||
one of: incomplete, error, complete. If incomplete, the job should be requeued.
|
||||
one of: incomplete, error, complete. Implementations of this method should call
|
||||
self.job_complete_callback with a status of Incomplete if they wish for the job to be
|
||||
automatically requeued.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
|
|
@ -5,7 +5,7 @@ from buildman.component.basecomponent import BaseComponent
|
|||
from buildman.component.buildcomponent import BuildComponent
|
||||
from buildman.manager.basemanager import BaseManager
|
||||
|
||||
from trollius.coroutines import From
|
||||
from trollius.coroutines import From, Return, coroutine
|
||||
|
||||
REGISTRATION_REALM = 'registration'
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -50,14 +50,15 @@ class EnterpriseManager(BaseManager):
|
|||
self.register_component(realm, BuildComponent, token="")
|
||||
return realm
|
||||
|
||||
@coroutine
|
||||
def schedule(self, build_job, loop):
|
||||
""" Schedules a build for an Enterprise Registry. """
|
||||
if self.shutting_down or not self.ready_components:
|
||||
return False
|
||||
raise Return(False)
|
||||
|
||||
component = self.ready_components.pop()
|
||||
loop.call_soon(component.start_build, build_job)
|
||||
return True
|
||||
raise Return(True)
|
||||
|
||||
def build_component_ready(self, build_component, loop):
|
||||
self.ready_components.add(build_component)
|
||||
|
@ -65,6 +66,7 @@ class EnterpriseManager(BaseManager):
|
|||
def shutdown(self):
|
||||
self.shutting_down = True
|
||||
|
||||
@coroutine
|
||||
def job_completed(self, build_job, job_status, build_component):
|
||||
self.job_complete_callback(build_job, job_status)
|
||||
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
import logging
|
||||
import etcd
|
||||
import uuid
|
||||
import calendar
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from trollius import From, coroutine, Return
|
||||
|
||||
from buildman.manager.basemanager import BaseManager
|
||||
from buildman.manager.executor import PopenExecutor, EC2Executor
|
||||
from buildman.component.buildcomponent import BuildComponent
|
||||
from buildman.asyncutil import AsyncWrapper
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -32,6 +35,13 @@ class EphemeralBuilderManager(BaseManager):
|
|||
""" Build manager implementation for the Enterprise Registry. """
|
||||
shutting_down = False
|
||||
|
||||
_executors = {
|
||||
'popen': PopenExecutor,
|
||||
'ec2': EC2Executor,
|
||||
}
|
||||
|
||||
_etcd_client_klass = etcd.Client
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._manager_config = None
|
||||
self._etcd_client = None
|
||||
|
@ -39,10 +49,6 @@ class EphemeralBuilderManager(BaseManager):
|
|||
self._component_to_job = {}
|
||||
self._component_to_builder = {}
|
||||
|
||||
self._executors = {
|
||||
'popen': PopenExecutor,
|
||||
'ec2': EC2Executor,
|
||||
}
|
||||
self._executor = None
|
||||
|
||||
super(EphemeralBuilderManager, self).__init__(*args, **kwargs)
|
||||
|
@ -58,9 +64,8 @@ class EphemeralBuilderManager(BaseManager):
|
|||
etcd_host = self._manager_config.get('ETCD_HOST', '127.0.0.1')
|
||||
etcd_port = self._manager_config.get('ETCD_PORT', 2379)
|
||||
logger.debug('Connecting to etcd on %s:%s', etcd_host, etcd_port)
|
||||
self._etcd_client = etcd.Client(host=etcd_host, port=etcd_port)
|
||||
|
||||
clear_etcd(self._etcd_client)
|
||||
self._etcd_client = AsyncWrapper(self._etcd_client_klass(host=etcd_host, port=etcd_port))
|
||||
|
||||
def setup_time(self):
|
||||
setup_time = self._manager_config.get('MACHINE_SETUP_TIME', 300)
|
||||
|
@ -71,13 +76,14 @@ class EphemeralBuilderManager(BaseManager):
|
|||
logger.debug('Calling shutdown.')
|
||||
raise NotImplementedError
|
||||
|
||||
@coroutine
|
||||
def schedule(self, build_job, loop):
|
||||
logger.debug('Calling schedule with job: %s', build_job.repo_build.uuid)
|
||||
logger.debug('Calling schedule with job: %s', build_job.job_details['build_uuid'])
|
||||
|
||||
# Check if there are worker slots avialable by checking the number of jobs in etcd
|
||||
allowed_worker_count = self._manager_config.get('ALLOWED_WORKER_COUNT', 2)
|
||||
allowed_worker_count = self._manager_config.get('ALLOWED_WORKER_COUNT', 1)
|
||||
try:
|
||||
building = self._etcd_client.read(ETCD_BUILDER_PREFIX, recursive=True)
|
||||
building = yield From(self._etcd_client.read(ETCD_BUILDER_PREFIX, recursive=True))
|
||||
workers_alive = sum(1 for child in building.children if not child.dir)
|
||||
except KeyError:
|
||||
workers_alive = 0
|
||||
|
@ -87,7 +93,7 @@ class EphemeralBuilderManager(BaseManager):
|
|||
if workers_alive >= allowed_worker_count:
|
||||
logger.info('Too many workers alive, unable to start new worker. %s >= %s', workers_alive,
|
||||
allowed_worker_count)
|
||||
return False
|
||||
raise Return(False)
|
||||
|
||||
job_key = self._etcd_job_key(build_job)
|
||||
|
||||
|
@ -97,28 +103,33 @@ class EphemeralBuilderManager(BaseManager):
|
|||
expiration = datetime.utcnow() + timedelta(seconds=self.setup_time())
|
||||
|
||||
payload = {
|
||||
'expiration': expiration.isoformat(),
|
||||
'expiration': calendar.timegm(expiration.timetuple()),
|
||||
}
|
||||
|
||||
try:
|
||||
self._etcd_client.write(job_key, payload, prevExist=False)
|
||||
yield From(self._etcd_client.write(job_key, payload, prevExist=False))
|
||||
component = self.register_component(realm, BuildComponent, token=token)
|
||||
self._component_to_job[component] = build_job
|
||||
except KeyError:
|
||||
# The job was already taken by someone else, we are probably a retry
|
||||
logger.warning('Job already exists in etcd, did an old worker die?')
|
||||
return False
|
||||
logger.error('Job already exists in etcd, are timeouts misconfigured or is the queue broken?')
|
||||
raise Return(False)
|
||||
|
||||
builder_id = self._executor.start_builder(realm, token)
|
||||
logger.debug('Starting builder with executor: %s', self._executor)
|
||||
builder_id = yield From(self._executor.start_builder(realm, token))
|
||||
self._component_to_builder[component] = builder_id
|
||||
|
||||
return True
|
||||
# Store the builder in etcd associated with the job id
|
||||
payload['builder_id'] = builder_id
|
||||
yield From(self._etcd_client.write(job_key, payload, prevExist=True))
|
||||
|
||||
raise Return(True)
|
||||
|
||||
def build_component_ready(self, build_component, loop):
|
||||
try:
|
||||
job = self._component_to_job.pop(build_component)
|
||||
logger.debug('Sending build %s to newly ready component on realm %s', job.repo_build.uuid,
|
||||
build_component.builder_realm)
|
||||
logger.debug('Sending build %s to newly ready component on realm %s',
|
||||
job.job_details['build_uuid'], build_component.builder_realm)
|
||||
loop.call_soon(build_component.start_build, job)
|
||||
except KeyError:
|
||||
logger.warning('Builder is asking for more work, but work already completed')
|
||||
|
@ -126,6 +137,7 @@ class EphemeralBuilderManager(BaseManager):
|
|||
def build_component_disposed(self, build_component, timed_out):
|
||||
logger.debug('Calling build_component_disposed.')
|
||||
|
||||
@coroutine
|
||||
def job_completed(self, build_job, job_status, build_component):
|
||||
logger.debug('Calling job_completed with status: %s', job_status)
|
||||
|
||||
|
@ -134,12 +146,24 @@ class EphemeralBuilderManager(BaseManager):
|
|||
|
||||
# Release the lock in etcd
|
||||
job_key = self._etcd_job_key(build_job)
|
||||
self._etcd_client.delete(job_key)
|
||||
yield From(self._etcd_client.delete(job_key))
|
||||
|
||||
self.job_complete_callback(build_job, job_status)
|
||||
|
||||
@coroutine
|
||||
def _clean_up_old_builder(self, job_key, job_payload):
|
||||
""" Terminate an old builders once the expiration date has passed.
|
||||
"""
|
||||
logger.debug('Cleaning up the old builder for job: %s', job_key)
|
||||
if 'builder_id' in job_payload:
|
||||
logger.info('Terminating expired build node.')
|
||||
yield From(self._executor.stop_builder(job_payload['builder_id']))
|
||||
|
||||
yield From(self._etcd_client.delete(job_key))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _etcd_job_key(build_job):
|
||||
""" Create a key which is used to track a job in etcd.
|
||||
"""
|
||||
return '{0}{1}'.format(ETCD_BUILDER_PREFIX, build_job.repo_build.uuid)
|
||||
return '{0}{1}'.format(ETCD_BUILDER_PREFIX, build_job.job_details['build_uuid'])
|
||||
|
|
|
@ -7,6 +7,10 @@ import requests
|
|||
import cachetools
|
||||
|
||||
from jinja2 import FileSystemLoader, Environment
|
||||
from trollius import coroutine, From, Return, get_event_loop
|
||||
from functools import partial
|
||||
|
||||
from buildman.asyncutil import AsyncWrapper
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -32,12 +36,14 @@ class BuilderExecutor(object):
|
|||
""" Interface which can be plugged into the EphemeralNodeManager to provide a strategy for
|
||||
starting and stopping builders.
|
||||
"""
|
||||
@coroutine
|
||||
def start_builder(self, realm, token):
|
||||
""" Create a builder with the specified config. Returns a unique id which can be used to manage
|
||||
the builder.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@coroutine
|
||||
def stop_builder(self, builder_id):
|
||||
""" Stop a builder which is currently running.
|
||||
"""
|
||||
|
@ -74,14 +80,18 @@ class EC2Executor(BuilderExecutor):
|
|||
"""
|
||||
COREOS_STACK_URL = 'http://%s.release.core-os.net/amd64-usr/current/coreos_production_ami_hvm.txt'
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._loop = get_event_loop()
|
||||
super(EC2Executor, self).__init__(*args, **kwargs)
|
||||
|
||||
def _get_conn(self):
|
||||
""" Creates an ec2 connection which can be used to manage instances.
|
||||
"""
|
||||
return boto.ec2.connect_to_region(
|
||||
return AsyncWrapper(boto.ec2.connect_to_region(
|
||||
self.executor_config['EC2_REGION'],
|
||||
aws_access_key_id=self.executor_config['AWS_ACCESS_KEY'],
|
||||
aws_secret_access_key=self.executor_config['AWS_SECRET_KEY'],
|
||||
)
|
||||
))
|
||||
|
||||
@classmethod
|
||||
@cachetools.ttl_cache(ttl=ONE_HOUR)
|
||||
|
@ -92,25 +102,24 @@ class EC2Executor(BuilderExecutor):
|
|||
stack_amis = dict([stack.split('=') for stack in stack_list_string.split('|')])
|
||||
return stack_amis[ec2_region]
|
||||
|
||||
@coroutine
|
||||
def start_builder(self, realm, token):
|
||||
region = self.executor_config['EC2_REGION']
|
||||
channel = self.executor_config.get('COREOS_CHANNEL', 'stable')
|
||||
coreos_ami = self._get_coreos_ami(region, channel)
|
||||
get_ami_callable = partial(self._get_coreos_ami, region, channel)
|
||||
coreos_ami = yield From(self._loop.run_in_executor(None, get_ami_callable))
|
||||
user_data = self.generate_cloud_config(realm, token, channel, self.manager_public_ip)
|
||||
|
||||
logger.debug('Generated cloud config: %s', user_data)
|
||||
|
||||
ec2_conn = self._get_conn()
|
||||
# class FakeReservation(object):
|
||||
# def __init__(self):
|
||||
# self.instances = None
|
||||
# reservation = FakeReservation()
|
||||
reservation = ec2_conn.run_instances(
|
||||
reservation = yield ec2_conn.run_instances(
|
||||
coreos_ami,
|
||||
instance_type=self.executor_config['EC2_INSTANCE_TYPE'],
|
||||
security_groups=self.executor_config['EC2_SECURITY_GROUP_IDS'],
|
||||
key_name=self.executor_config.get('EC2_KEY_NAME', None),
|
||||
user_data=user_data,
|
||||
instance_initiated_shutdown_behavior='terminate',
|
||||
)
|
||||
|
||||
if not reservation.instances:
|
||||
|
@ -124,12 +133,13 @@ class EC2Executor(BuilderExecutor):
|
|||
'Realm': realm,
|
||||
'Token': token,
|
||||
})
|
||||
return launched.id
|
||||
raise Return(launched.id)
|
||||
|
||||
@coroutine
|
||||
def stop_builder(self, builder_id):
|
||||
ec2_conn = self._get_conn()
|
||||
stopped_instance_ids = [si.id for si in ec2_conn.stop_instances([builder_id], force=True)]
|
||||
if builder_id not in stopped_instance_ids:
|
||||
stopped_instances = yield ec2_conn.stop_instances([builder_id], force=True)
|
||||
if builder_id not in [si.id for si in stopped_instances]:
|
||||
raise ExecutorException('Unable to stop instance: %s' % builder_id)
|
||||
|
||||
class PopenExecutor(BuilderExecutor):
|
||||
|
@ -142,6 +152,7 @@ class PopenExecutor(BuilderExecutor):
|
|||
|
||||
""" Executor which uses Popen to fork a quay-builder process.
|
||||
"""
|
||||
@coroutine
|
||||
def start_builder(self, realm, token):
|
||||
# Now start a machine for this job, adding the machine id to the etcd information
|
||||
logger.debug('Forking process for build')
|
||||
|
@ -162,9 +173,9 @@ class PopenExecutor(BuilderExecutor):
|
|||
builder_id = str(uuid.uuid4())
|
||||
self._jobs[builder_id] = (spawned, logpipe)
|
||||
logger.debug('Builder spawned with id: %s', builder_id)
|
||||
return builder_id
|
||||
|
||||
raise Return(builder_id)
|
||||
|
||||
@coroutine
|
||||
def stop_builder(self, builder_id):
|
||||
if builder_id not in self._jobs:
|
||||
raise ExecutorException('Builder id not being tracked by executor.')
|
||||
|
|
Reference in a new issue