Generalize the ephemeral build managers so that any manager may manage a builder spawned by any other manager.
This commit is contained in:
parent
ccb19571d6
commit
cc70225043
11 changed files with 258 additions and 125 deletions
|
@ -41,8 +41,11 @@ def run_build_manager():
|
||||||
if manager_klass is None:
|
if manager_klass is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
public_ip = os.environ.get('PUBLIC_IP', '127.0.0.1')
|
manager_hostname = os.environ.get('BUILDMAN_HOSTNAME',
|
||||||
logger.debug('Will pass public IP address %s to builders for websocket connection', public_ip)
|
app.config.get('BUILDMAN_HOSTNAME',
|
||||||
|
app.config['SERVER_HOSTNAME']))
|
||||||
|
logger.debug('Will pass buildman hostname %s to builders for websocket connection',
|
||||||
|
manager_hostname)
|
||||||
|
|
||||||
logger.debug('Starting build manager with lifecycle "%s"', build_manager_config[0])
|
logger.debug('Starting build manager with lifecycle "%s"', build_manager_config[0])
|
||||||
ssl_context = None
|
ssl_context = None
|
||||||
|
@ -53,7 +56,7 @@ def run_build_manager():
|
||||||
os.path.join(os.environ.get('SSL_CONFIG'), 'ssl.key'))
|
os.path.join(os.environ.get('SSL_CONFIG'), 'ssl.key'))
|
||||||
|
|
||||||
server = BuilderServer(app.config['SERVER_HOSTNAME'], dockerfile_build_queue, build_logs,
|
server = BuilderServer(app.config['SERVER_HOSTNAME'], dockerfile_build_queue, build_logs,
|
||||||
user_files, manager_klass, build_manager_config[1], public_ip)
|
user_files, manager_klass, build_manager_config[1], manager_hostname)
|
||||||
server.run('0.0.0.0', ssl=ssl_context)
|
server.run('0.0.0.0', ssl=ssl_context)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -9,6 +9,7 @@ from autobahn.wamp.exception import ApplicationError
|
||||||
|
|
||||||
from buildman.server import BuildJobResult
|
from buildman.server import BuildJobResult
|
||||||
from buildman.component.basecomponent import BaseComponent
|
from buildman.component.basecomponent import BaseComponent
|
||||||
|
from buildman.jobutil.buildjob import BuildJobLoadException
|
||||||
from buildman.jobutil.buildpack import BuildPackage, BuildPackageException
|
from buildman.jobutil.buildpack import BuildPackage, BuildPackageException
|
||||||
from buildman.jobutil.buildstatus import StatusHandler
|
from buildman.jobutil.buildstatus import StatusHandler
|
||||||
from buildman.jobutil.workererror import WorkerError
|
from buildman.jobutil.workererror import WorkerError
|
||||||
|
@ -58,19 +59,20 @@ class BuildComponent(BaseComponent):
|
||||||
yield trollius.From(self.subscribe(self._on_heartbeat, 'io.quay.builder.heartbeat'))
|
yield trollius.From(self.subscribe(self._on_heartbeat, 'io.quay.builder.heartbeat'))
|
||||||
yield trollius.From(self.subscribe(self._on_log_message, 'io.quay.builder.logmessage'))
|
yield trollius.From(self.subscribe(self._on_log_message, 'io.quay.builder.logmessage'))
|
||||||
|
|
||||||
self._set_status(ComponentStatus.WAITING)
|
yield trollius.From(self._set_status(ComponentStatus.WAITING))
|
||||||
|
|
||||||
def is_ready(self):
|
def is_ready(self):
|
||||||
""" Determines whether a build component is ready to begin a build. """
|
""" Determines whether a build component is ready to begin a build. """
|
||||||
return self._component_status == ComponentStatus.RUNNING
|
return self._component_status == ComponentStatus.RUNNING
|
||||||
|
|
||||||
|
@trollius.coroutine
|
||||||
def start_build(self, build_job):
|
def start_build(self, build_job):
|
||||||
""" Starts a build. """
|
""" Starts a build. """
|
||||||
self._current_job = build_job
|
self._current_job = build_job
|
||||||
self._build_status = StatusHandler(self.build_logs, build_job.repo_build.uuid)
|
self._build_status = StatusHandler(self.build_logs, build_job.repo_build.uuid)
|
||||||
self._image_info = {}
|
self._image_info = {}
|
||||||
|
|
||||||
self._set_status(ComponentStatus.BUILDING)
|
yield trollius.From(self._set_status(ComponentStatus.BUILDING))
|
||||||
|
|
||||||
# Retrieve the job's buildpack.
|
# Retrieve the job's buildpack.
|
||||||
buildpack_url = self.user_files.get_file_url(build_job.repo_build.resource_key,
|
buildpack_url = self.user_files.get_file_url(build_job.repo_build.resource_key,
|
||||||
|
@ -82,23 +84,27 @@ class BuildComponent(BaseComponent):
|
||||||
buildpack = BuildPackage.from_url(buildpack_url)
|
buildpack = BuildPackage.from_url(buildpack_url)
|
||||||
except BuildPackageException as bpe:
|
except BuildPackageException as bpe:
|
||||||
self._build_failure('Could not retrieve build package', bpe)
|
self._build_failure('Could not retrieve build package', bpe)
|
||||||
return
|
raise trollius.Return()
|
||||||
|
|
||||||
# Extract the base image information from the Dockerfile.
|
# Extract the base image information from the Dockerfile.
|
||||||
parsed_dockerfile = None
|
parsed_dockerfile = None
|
||||||
logger.debug('Parsing dockerfile')
|
logger.debug('Parsing dockerfile')
|
||||||
|
|
||||||
build_config = build_job.build_config
|
try:
|
||||||
|
build_config = build_job.build_config
|
||||||
|
except BuildJobLoadException as irbe:
|
||||||
|
self._build_failure('Could not load build job information', irbe)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parsed_dockerfile = buildpack.parse_dockerfile(build_config.get('build_subdir'))
|
parsed_dockerfile = buildpack.parse_dockerfile(build_config.get('build_subdir'))
|
||||||
except BuildPackageException as bpe:
|
except BuildPackageException as bpe:
|
||||||
self._build_failure('Could not find Dockerfile in build package', bpe)
|
self._build_failure('Could not find Dockerfile in build package', bpe)
|
||||||
return
|
raise trollius.Return()
|
||||||
|
|
||||||
image_and_tag_tuple = parsed_dockerfile.get_image_and_tag()
|
image_and_tag_tuple = parsed_dockerfile.get_image_and_tag()
|
||||||
if image_and_tag_tuple is None or image_and_tag_tuple[0] is None:
|
if image_and_tag_tuple is None or image_and_tag_tuple[0] is None:
|
||||||
self._build_failure('Missing FROM line in Dockerfile')
|
self._build_failure('Missing FROM line in Dockerfile')
|
||||||
return
|
raise trollius.Return()
|
||||||
|
|
||||||
base_image_information = {
|
base_image_information = {
|
||||||
'repository': image_and_tag_tuple[0],
|
'repository': image_and_tag_tuple[0],
|
||||||
|
@ -147,9 +153,7 @@ class BuildComponent(BaseComponent):
|
||||||
logger.debug('Invoking build: %s', self.builder_realm)
|
logger.debug('Invoking build: %s', self.builder_realm)
|
||||||
logger.debug('With Arguments: %s', build_arguments)
|
logger.debug('With Arguments: %s', build_arguments)
|
||||||
|
|
||||||
return (self
|
self.call("io.quay.builder.build", **build_arguments).add_done_callback(self._build_complete)
|
||||||
.call("io.quay.builder.build", **build_arguments)
|
|
||||||
.add_done_callback(self._build_complete))
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _total_completion(statuses, total_images):
|
def _total_completion(statuses, total_images):
|
||||||
|
@ -276,38 +280,42 @@ class BuildComponent(BaseComponent):
|
||||||
self._current_job = None
|
self._current_job = None
|
||||||
|
|
||||||
# Set the component back to a running state.
|
# Set the component back to a running state.
|
||||||
self._set_status(ComponentStatus.RUNNING)
|
yield trollius.From(self._set_status(ComponentStatus.RUNNING))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _ping():
|
def _ping():
|
||||||
""" Ping pong. """
|
""" Ping pong. """
|
||||||
return 'pong'
|
return 'pong'
|
||||||
|
|
||||||
|
@trollius.coroutine
|
||||||
def _on_ready(self, token, version):
|
def _on_ready(self, token, version):
|
||||||
if not version in SUPPORTED_WORKER_VERSIONS:
|
if not version in SUPPORTED_WORKER_VERSIONS:
|
||||||
logger.warning('Build component (token "%s") is running an out-of-date version: %s', version)
|
logger.warning('Build component (token "%s") is running an out-of-date version: %s', token,
|
||||||
return False
|
version)
|
||||||
|
raise trollius.Return(False)
|
||||||
|
|
||||||
if self._component_status != 'waiting':
|
if self._component_status != 'waiting':
|
||||||
logger.warning('Build component (token "%s") is already connected', self.expected_token)
|
logger.warning('Build component (token "%s") is already connected', self.expected_token)
|
||||||
return False
|
raise trollius.Return(False)
|
||||||
|
|
||||||
if token != self.expected_token:
|
if token != self.expected_token:
|
||||||
logger.warning('Builder token mismatch. Expected: "%s". Found: "%s"', self.expected_token, token)
|
logger.warning('Builder token mismatch. Expected: "%s". Found: "%s"', self.expected_token,
|
||||||
return False
|
token)
|
||||||
|
raise trollius.Return(False)
|
||||||
|
|
||||||
self._set_status(ComponentStatus.RUNNING)
|
yield trollius.From(self._set_status(ComponentStatus.RUNNING))
|
||||||
|
|
||||||
# Start the heartbeat check and updating loop.
|
# Start the heartbeat check and updating loop.
|
||||||
loop = trollius.get_event_loop()
|
loop = trollius.get_event_loop()
|
||||||
loop.create_task(self._heartbeat())
|
loop.create_task(self._heartbeat())
|
||||||
logger.debug('Build worker %s is connected and ready', self.builder_realm)
|
logger.debug('Build worker %s is connected and ready', self.builder_realm)
|
||||||
return True
|
raise trollius.Return(True)
|
||||||
|
|
||||||
|
@trollius.coroutine
|
||||||
def _set_status(self, phase):
|
def _set_status(self, phase):
|
||||||
if phase == ComponentStatus.RUNNING:
|
if phase == ComponentStatus.RUNNING:
|
||||||
loop = trollius.get_event_loop()
|
loop = trollius.get_event_loop()
|
||||||
self.parent_manager.build_component_ready(self, loop)
|
yield trollius.From(self.parent_manager.build_component_ready(self, loop))
|
||||||
|
|
||||||
self._component_status = phase
|
self._component_status = phase
|
||||||
|
|
||||||
|
@ -344,13 +352,14 @@ class BuildComponent(BaseComponent):
|
||||||
logger.debug('Checking heartbeat on realm %s', self.builder_realm)
|
logger.debug('Checking heartbeat on realm %s', self.builder_realm)
|
||||||
if (self._last_heartbeat and
|
if (self._last_heartbeat and
|
||||||
self._last_heartbeat < datetime.datetime.utcnow() - HEARTBEAT_DELTA):
|
self._last_heartbeat < datetime.datetime.utcnow() - HEARTBEAT_DELTA):
|
||||||
self._timeout()
|
yield trollius.From(self._timeout())
|
||||||
return
|
return
|
||||||
|
|
||||||
yield trollius.From(trollius.sleep(HEARTBEAT_TIMEOUT))
|
yield trollius.From(trollius.sleep(HEARTBEAT_TIMEOUT))
|
||||||
|
|
||||||
|
@trollius.coroutine
|
||||||
def _timeout(self):
|
def _timeout(self):
|
||||||
self._set_status(ComponentStatus.TIMED_OUT)
|
yield trollius.From(self._set_status(ComponentStatus.TIMED_OUT))
|
||||||
logger.warning('Build component with realm %s has timed out', self.builder_realm)
|
logger.warning('Build component with realm %s has timed out', self.builder_realm)
|
||||||
self._dispose(timed_out=True)
|
self._dispose(timed_out=True)
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
|
import json
|
||||||
|
|
||||||
|
from cachetools import lru_cache
|
||||||
|
|
||||||
from data import model
|
from data import model
|
||||||
|
|
||||||
import json
|
|
||||||
|
|
||||||
class BuildJobLoadException(Exception):
|
class BuildJobLoadException(Exception):
|
||||||
""" Exception raised if a build job could not be instantiated for some reason. """
|
""" Exception raised if a build job could not be instantiated for some reason. """
|
||||||
|
@ -18,14 +21,22 @@ class BuildJob(object):
|
||||||
'Could not parse build queue item config with ID %s' % self.job_details['build_uuid']
|
'Could not parse build queue item config with ID %s' % self.job_details['build_uuid']
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def _load_repo_build(self):
|
||||||
try:
|
try:
|
||||||
self.repo_build = model.get_repository_build(self.job_details['build_uuid'])
|
return model.get_repository_build(self.job_details['build_uuid'])
|
||||||
except model.InvalidRepositoryBuildException:
|
except model.InvalidRepositoryBuildException:
|
||||||
raise BuildJobLoadException(
|
raise BuildJobLoadException(
|
||||||
'Could not load repository build with ID %s' % self.job_details['build_uuid'])
|
'Could not load repository build with ID %s' % self.job_details['build_uuid'])
|
||||||
|
|
||||||
|
@property
|
||||||
|
def repo_build(self):
|
||||||
|
return self._load_repo_build()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def build_config(self):
|
||||||
try:
|
try:
|
||||||
self.build_config = json.loads(self.repo_build.job_config)
|
return json.loads(self.repo_build.job_config)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise BuildJobLoadException(
|
raise BuildJobLoadException(
|
||||||
'Could not parse repository build job config with ID %s' % self.job_details['build_uuid']
|
'Could not parse repository build job config with ID %s' % self.job_details['build_uuid']
|
||||||
|
|
|
@ -3,12 +3,12 @@ from trollius import coroutine
|
||||||
class BaseManager(object):
|
class BaseManager(object):
|
||||||
""" Base for all worker managers. """
|
""" Base for all worker managers. """
|
||||||
def __init__(self, register_component, unregister_component, job_heartbeat_callback,
|
def __init__(self, register_component, unregister_component, job_heartbeat_callback,
|
||||||
job_complete_callback, public_ip_address, heartbeat_period_sec):
|
job_complete_callback, manager_hostname, heartbeat_period_sec):
|
||||||
self.register_component = register_component
|
self.register_component = register_component
|
||||||
self.unregister_component = unregister_component
|
self.unregister_component = unregister_component
|
||||||
self.job_heartbeat_callback = job_heartbeat_callback
|
self.job_heartbeat_callback = job_heartbeat_callback
|
||||||
self.job_complete_callback = job_complete_callback
|
self.job_complete_callback = job_complete_callback
|
||||||
self.public_ip_address = public_ip_address
|
self.manager_hostname = manager_hostname
|
||||||
self.heartbeat_period_sec = heartbeat_period_sec
|
self.heartbeat_period_sec = heartbeat_period_sec
|
||||||
|
|
||||||
@coroutine
|
@coroutine
|
||||||
|
@ -31,7 +31,7 @@ class BaseManager(object):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@coroutine
|
@coroutine
|
||||||
def schedule(self, build_job, loop):
|
def schedule(self, build_job):
|
||||||
""" Schedules a queue item to be built. Returns True if the item was properly scheduled
|
""" Schedules a queue item to be built. Returns True if the item was properly scheduled
|
||||||
and False if all workers are busy.
|
and False if all workers are busy.
|
||||||
"""
|
"""
|
||||||
|
@ -42,7 +42,8 @@ class BaseManager(object):
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def build_component_ready(self, build_component, loop):
|
@coroutine
|
||||||
|
def build_component_ready(self, build_component):
|
||||||
""" Method invoked whenever a build component announces itself as ready.
|
""" Method invoked whenever a build component announces itself as ready.
|
||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
|
@ -5,7 +5,7 @@ from buildman.component.basecomponent import BaseComponent
|
||||||
from buildman.component.buildcomponent import BuildComponent
|
from buildman.component.buildcomponent import BuildComponent
|
||||||
from buildman.manager.basemanager import BaseManager
|
from buildman.manager.basemanager import BaseManager
|
||||||
|
|
||||||
from trollius.coroutines import From, Return, coroutine
|
from trollius import From, Return, coroutine, async
|
||||||
|
|
||||||
REGISTRATION_REALM = 'registration'
|
REGISTRATION_REALM = 'registration'
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -51,16 +51,19 @@ class EnterpriseManager(BaseManager):
|
||||||
return realm
|
return realm
|
||||||
|
|
||||||
@coroutine
|
@coroutine
|
||||||
def schedule(self, build_job, loop):
|
def schedule(self, build_job):
|
||||||
""" Schedules a build for an Enterprise Registry. """
|
""" Schedules a build for an Enterprise Registry. """
|
||||||
if self.shutting_down or not self.ready_components:
|
if self.shutting_down or not self.ready_components:
|
||||||
raise Return(False)
|
raise Return(False)
|
||||||
|
|
||||||
component = self.ready_components.pop()
|
component = self.ready_components.pop()
|
||||||
loop.call_soon(component.start_build, build_job)
|
|
||||||
|
yield From(component.start_build(build_job))
|
||||||
|
|
||||||
raise Return(True)
|
raise Return(True)
|
||||||
|
|
||||||
def build_component_ready(self, build_component, loop):
|
@coroutine
|
||||||
|
def build_component_ready(self, build_component):
|
||||||
self.ready_components.add(build_component)
|
self.ready_components.add(build_component)
|
||||||
|
|
||||||
def shutdown(self):
|
def shutdown(self):
|
||||||
|
|
|
@ -13,16 +13,28 @@ from urllib3.exceptions import ReadTimeoutError
|
||||||
from buildman.manager.basemanager import BaseManager
|
from buildman.manager.basemanager import BaseManager
|
||||||
from buildman.manager.executor import PopenExecutor, EC2Executor
|
from buildman.manager.executor import PopenExecutor, EC2Executor
|
||||||
from buildman.component.buildcomponent import BuildComponent
|
from buildman.component.buildcomponent import BuildComponent
|
||||||
|
from buildman.jobutil.buildjob import BuildJob
|
||||||
from buildman.asyncutil import AsyncWrapper
|
from buildman.asyncutil import AsyncWrapper
|
||||||
|
from util.morecollections import AttrDict
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
ETCD_BUILDER_PREFIX = 'building/'
|
ETCD_BUILDER_PREFIX = 'building/'
|
||||||
ETCD_EXPIRE_RESULT = 'expire'
|
ETCD_REALM_PREFIX = 'realm/'
|
||||||
ETCD_DISABLE_TIMEOUT = 0
|
ETCD_DISABLE_TIMEOUT = 0
|
||||||
|
|
||||||
|
class EtcdAction(object):
|
||||||
|
GET = 'get'
|
||||||
|
SET = 'set'
|
||||||
|
EXPIRE = 'expire'
|
||||||
|
UPDATE = 'update'
|
||||||
|
DELETE = 'delete'
|
||||||
|
CREATE = 'create'
|
||||||
|
COMPARE_AND_SWAP = 'compareAndSwap'
|
||||||
|
COMPARE_AND_DELETE = 'compareAndDelete'
|
||||||
|
|
||||||
|
|
||||||
class EphemeralBuilderManager(BaseManager):
|
class EphemeralBuilderManager(BaseManager):
|
||||||
""" Build manager implementation for the Enterprise Registry. """
|
""" Build manager implementation for the Enterprise Registry. """
|
||||||
|
@ -41,52 +53,82 @@ class EphemeralBuilderManager(BaseManager):
|
||||||
self._etcd_client = None
|
self._etcd_client = None
|
||||||
|
|
||||||
self._component_to_job = {}
|
self._component_to_job = {}
|
||||||
|
self._job_uuid_to_component = {}
|
||||||
self._component_to_builder = {}
|
self._component_to_builder = {}
|
||||||
|
|
||||||
self._executor = None
|
self._executor = None
|
||||||
|
|
||||||
self._worker_watch_task = None
|
# Map of etcd keys being watched to the tasks watching them
|
||||||
|
self._watch_tasks = {}
|
||||||
|
|
||||||
super(EphemeralBuilderManager, self).__init__(*args, **kwargs)
|
super(EphemeralBuilderManager, self).__init__(*args, **kwargs)
|
||||||
|
|
||||||
def _watch_builders(self):
|
def _watch_etcd(self, etcd_key, change_callback, recursive=True):
|
||||||
""" Watch the builders key for expirations.
|
watch_task_key = (etcd_key, recursive)
|
||||||
"""
|
def callback_wrapper(changed_key_future):
|
||||||
|
|
||||||
|
if watch_task_key not in self._watch_tasks or self._watch_tasks[watch_task_key].done():
|
||||||
|
self._watch_etcd(etcd_key, change_callback)
|
||||||
|
|
||||||
|
if changed_key_future.cancelled():
|
||||||
|
# Due to lack of interest, tomorrow has been cancelled
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
etcd_result = changed_key_future.result()
|
||||||
|
except ReadTimeoutError:
|
||||||
|
return
|
||||||
|
|
||||||
|
change_callback(etcd_result)
|
||||||
|
|
||||||
if not self._shutting_down:
|
if not self._shutting_down:
|
||||||
workers_future = self._etcd_client.watch(ETCD_BUILDER_PREFIX, recursive=True,
|
watch_future = self._etcd_client.watch(etcd_key, recursive=recursive,
|
||||||
timeout=ETCD_DISABLE_TIMEOUT)
|
timeout=ETCD_DISABLE_TIMEOUT)
|
||||||
workers_future.add_done_callback(self._handle_key_expiration)
|
watch_future.add_done_callback(callback_wrapper)
|
||||||
logger.debug('Scheduling watch task.')
|
logger.debug('Scheduling watch of key: %s%s', etcd_key, '/*' if recursive else '')
|
||||||
self._worker_watch_task = async(workers_future)
|
self._watch_tasks[watch_task_key] = async(watch_future)
|
||||||
|
|
||||||
def _handle_key_expiration(self, changed_key_future):
|
def _handle_builder_expiration(self, etcd_result):
|
||||||
""" Handle when a builder expires
|
if etcd_result.action == EtcdAction.EXPIRE:
|
||||||
"""
|
|
||||||
if self._worker_watch_task is None or self._worker_watch_task.done():
|
|
||||||
self._watch_builders()
|
|
||||||
|
|
||||||
if changed_key_future.cancelled():
|
|
||||||
# Due to lack of interest, tomorrow has been cancelled
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
etcd_result = changed_key_future.result()
|
|
||||||
except ReadTimeoutError:
|
|
||||||
return
|
|
||||||
|
|
||||||
if etcd_result.action == ETCD_EXPIRE_RESULT:
|
|
||||||
# Handle the expiration
|
# Handle the expiration
|
||||||
logger.debug('Builder expired, clean up the old build node')
|
logger.debug('Builder expired, clean up the old build node')
|
||||||
job_metadata = json.loads(etcd_result._prev_node.value)
|
job_metadata = json.loads(etcd_result._prev_node.value)
|
||||||
async(self._clean_up_old_builder(etcd_result.key, job_metadata))
|
async(self._clean_up_old_builder(etcd_result.key, job_metadata))
|
||||||
|
|
||||||
|
def _handle_realm_change(self, etcd_result):
|
||||||
|
if etcd_result.action == EtcdAction.SET:
|
||||||
|
# We must listen on the realm created by ourselves or another worker
|
||||||
|
realm_spec = json.loads(etcd_result.value)
|
||||||
|
component = self.register_component(realm_spec['realm'], BuildComponent,
|
||||||
|
token=realm_spec['token'])
|
||||||
|
build_job = BuildJob(AttrDict(realm_spec['job_queue_item']))
|
||||||
|
self._component_to_job[component] = build_job
|
||||||
|
self._component_to_builder[component] = realm_spec['builder_id']
|
||||||
|
self._job_uuid_to_component[build_job.job_details['build_uuid']] = component
|
||||||
|
|
||||||
|
elif etcd_result.action == EtcdAction.DELETE or etcd_result.action == EtcdAction.EXPIRE:
|
||||||
|
# We must stop listening for new connections on the specified realm, if we did not get the
|
||||||
|
# connection
|
||||||
|
realm_spec = json.loads(etcd_result._prev_node.value)
|
||||||
|
build_job = BuildJob(AttrDict(realm_spec['job_queue_item']))
|
||||||
|
component = self._job_uuid_to_component.pop(build_job.job_details['build_uuid'], None)
|
||||||
|
if component is not None:
|
||||||
|
# We were not the manager which the worker connected to, remove the bookkeeping for it
|
||||||
|
logger.debug('Unregistering unused component on realm: %s', realm_spec['realm'])
|
||||||
|
del self._component_to_job[component]
|
||||||
|
del self._component_to_builder[component]
|
||||||
|
self.unregister_component(component)
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.warning('Unexpected action (%s) on realm key: %s', etcd_result.action, etcd_result.key)
|
||||||
|
|
||||||
def initialize(self, manager_config):
|
def initialize(self, manager_config):
|
||||||
logger.debug('Calling initialize')
|
logger.debug('Calling initialize')
|
||||||
self._manager_config = manager_config
|
self._manager_config = manager_config
|
||||||
|
|
||||||
executor_klass = self._executors.get(manager_config.get('EXECUTOR', ''), PopenExecutor)
|
executor_klass = self._executors.get(manager_config.get('EXECUTOR', ''), PopenExecutor)
|
||||||
self._executor = executor_klass(manager_config.get('EXECUTOR_CONFIG', {}),
|
self._executor = executor_klass(manager_config.get('EXECUTOR_CONFIG', {}),
|
||||||
self.public_ip_address)
|
self.manager_hostname)
|
||||||
|
|
||||||
etcd_host = self._manager_config.get('ETCD_HOST', '127.0.0.1')
|
etcd_host = self._manager_config.get('ETCD_HOST', '127.0.0.1')
|
||||||
etcd_port = self._manager_config.get('ETCD_PORT', 2379)
|
etcd_port = self._manager_config.get('ETCD_PORT', 2379)
|
||||||
|
@ -97,7 +139,8 @@ class EphemeralBuilderManager(BaseManager):
|
||||||
self._etcd_client = AsyncWrapper(self._etcd_client_klass(host=etcd_host, port=etcd_port),
|
self._etcd_client = AsyncWrapper(self._etcd_client_klass(host=etcd_host, port=etcd_port),
|
||||||
executor=self._async_thread_executor)
|
executor=self._async_thread_executor)
|
||||||
|
|
||||||
self._watch_builders()
|
self._watch_etcd(ETCD_BUILDER_PREFIX, self._handle_builder_expiration)
|
||||||
|
self._watch_etcd(ETCD_REALM_PREFIX, self._handle_realm_change)
|
||||||
|
|
||||||
def setup_time(self):
|
def setup_time(self):
|
||||||
setup_time = self._manager_config.get('MACHINE_SETUP_TIME', 300)
|
setup_time = self._manager_config.get('MACHINE_SETUP_TIME', 300)
|
||||||
|
@ -108,17 +151,17 @@ class EphemeralBuilderManager(BaseManager):
|
||||||
logger.debug('Shutting down worker.')
|
logger.debug('Shutting down worker.')
|
||||||
self._shutting_down = True
|
self._shutting_down = True
|
||||||
|
|
||||||
if self._worker_watch_task is not None:
|
for (etcd_key, _), task in self._watch_tasks.items():
|
||||||
logger.debug('Canceling watch task.')
|
if not task.done():
|
||||||
self._worker_watch_task.cancel()
|
logger.debug('Canceling watch task for %s', etcd_key)
|
||||||
self._worker_watch_task = None
|
task.cancel()
|
||||||
|
|
||||||
if self._async_thread_executor is not None:
|
if self._async_thread_executor is not None:
|
||||||
logger.debug('Shutting down thread pool executor.')
|
logger.debug('Shutting down thread pool executor.')
|
||||||
self._async_thread_executor.shutdown()
|
self._async_thread_executor.shutdown()
|
||||||
|
|
||||||
@coroutine
|
@coroutine
|
||||||
def schedule(self, build_job, loop):
|
def schedule(self, build_job):
|
||||||
logger.debug('Calling schedule with job: %s', build_job.job_details['build_uuid'])
|
logger.debug('Calling schedule with job: %s', build_job.job_details['build_uuid'])
|
||||||
|
|
||||||
# Check if there are worker slots avialable by checking the number of jobs in etcd
|
# Check if there are worker slots avialable by checking the number of jobs in etcd
|
||||||
|
@ -154,8 +197,6 @@ class EphemeralBuilderManager(BaseManager):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
yield From(self._etcd_client.write(job_key, json.dumps(payload), prevExist=False, ttl=ttl))
|
yield From(self._etcd_client.write(job_key, json.dumps(payload), prevExist=False, ttl=ttl))
|
||||||
component = self.register_component(realm, BuildComponent, token=token)
|
|
||||||
self._component_to_job[component] = build_job
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
# The job was already taken by someone else, we are probably a retry
|
# The job was already taken by someone else, we are probably a retry
|
||||||
logger.error('Job already exists in etcd, are timeouts misconfigured or is the queue broken?')
|
logger.error('Job already exists in etcd, are timeouts misconfigured or is the queue broken?')
|
||||||
|
@ -163,20 +204,38 @@ class EphemeralBuilderManager(BaseManager):
|
||||||
|
|
||||||
logger.debug('Starting builder with executor: %s', self._executor)
|
logger.debug('Starting builder with executor: %s', self._executor)
|
||||||
builder_id = yield From(self._executor.start_builder(realm, token))
|
builder_id = yield From(self._executor.start_builder(realm, token))
|
||||||
self._component_to_builder[component] = builder_id
|
|
||||||
|
|
||||||
# Store the builder in etcd associated with the job id
|
# Store the builder in etcd associated with the job id
|
||||||
payload['builder_id'] = builder_id
|
payload['builder_id'] = builder_id
|
||||||
yield From(self._etcd_client.write(job_key, json.dumps(payload), prevExist=True, ttl=ttl))
|
yield From(self._etcd_client.write(job_key, json.dumps(payload), prevExist=True, ttl=ttl))
|
||||||
|
|
||||||
|
# Store the realm spec which will allow any manager to accept this builder when it connects
|
||||||
|
realm_spec = json.dumps({
|
||||||
|
'realm': realm,
|
||||||
|
'token': token,
|
||||||
|
'builder_id': builder_id,
|
||||||
|
'job_queue_item': build_job.job_item,
|
||||||
|
})
|
||||||
|
try:
|
||||||
|
yield From(self._etcd_client.write(self._etcd_realm_key(realm), realm_spec, prevExist=False,
|
||||||
|
ttl=ttl))
|
||||||
|
except KeyError:
|
||||||
|
logger.error('Realm already exists in etcd. UUID collision or something is very very wrong.')
|
||||||
|
raise Return(False)
|
||||||
|
|
||||||
raise Return(True)
|
raise Return(True)
|
||||||
|
|
||||||
def build_component_ready(self, build_component, loop):
|
@coroutine
|
||||||
|
def build_component_ready(self, build_component):
|
||||||
try:
|
try:
|
||||||
|
# Clean up the bookkeeping for allowing any manager to take the job
|
||||||
job = self._component_to_job.pop(build_component)
|
job = self._component_to_job.pop(build_component)
|
||||||
|
del self._job_uuid_to_component[job.job_details['build_uuid']]
|
||||||
|
yield From(self._etcd_client.delete(self._etcd_realm_key(build_component.builder_realm)))
|
||||||
|
|
||||||
logger.debug('Sending build %s to newly ready component on realm %s',
|
logger.debug('Sending build %s to newly ready component on realm %s',
|
||||||
job.job_details['build_uuid'], build_component.builder_realm)
|
job.job_details['build_uuid'], build_component.builder_realm)
|
||||||
loop.call_soon(build_component.start_build, job)
|
yield From(build_component.start_build(job))
|
||||||
except KeyError:
|
except KeyError:
|
||||||
logger.warning('Builder is asking for more work, but work already completed')
|
logger.warning('Builder is asking for more work, but work already completed')
|
||||||
|
|
||||||
|
@ -240,6 +299,12 @@ class EphemeralBuilderManager(BaseManager):
|
||||||
"""
|
"""
|
||||||
return os.path.join(ETCD_BUILDER_PREFIX, build_job.job_details['build_uuid'])
|
return os.path.join(ETCD_BUILDER_PREFIX, build_job.job_details['build_uuid'])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _etcd_realm_key(realm):
|
||||||
|
""" Create a key which is used to track an incoming connection on a realm.
|
||||||
|
"""
|
||||||
|
return os.path.join(ETCD_REALM_PREFIX, realm)
|
||||||
|
|
||||||
def num_workers(self):
|
def num_workers(self):
|
||||||
""" Return the number of workers we're managing locally.
|
""" Return the number of workers we're managing locally.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -29,9 +29,9 @@ class ExecutorException(Exception):
|
||||||
|
|
||||||
|
|
||||||
class BuilderExecutor(object):
|
class BuilderExecutor(object):
|
||||||
def __init__(self, executor_config, manager_public_ip):
|
def __init__(self, executor_config, manager_hostname):
|
||||||
self.executor_config = executor_config
|
self.executor_config = executor_config
|
||||||
self.manager_public_ip = manager_public_ip
|
self.manager_hostname = manager_hostname
|
||||||
|
|
||||||
""" Interface which can be plugged into the EphemeralNodeManager to provide a strategy for
|
""" Interface which can be plugged into the EphemeralNodeManager to provide a strategy for
|
||||||
starting and stopping builders.
|
starting and stopping builders.
|
||||||
|
@ -52,7 +52,7 @@ class BuilderExecutor(object):
|
||||||
def get_manager_websocket_url(self):
|
def get_manager_websocket_url(self):
|
||||||
return 'ws://{0}:'
|
return 'ws://{0}:'
|
||||||
|
|
||||||
def generate_cloud_config(self, realm, token, coreos_channel, manager_ip,
|
def generate_cloud_config(self, realm, token, coreos_channel, manager_hostname,
|
||||||
quay_username=None, quay_password=None, etcd_token=None):
|
quay_username=None, quay_password=None, etcd_token=None):
|
||||||
if quay_username is None:
|
if quay_username is None:
|
||||||
quay_username = self.executor_config['QUAY_USERNAME']
|
quay_username = self.executor_config['QUAY_USERNAME']
|
||||||
|
@ -69,7 +69,7 @@ class BuilderExecutor(object):
|
||||||
quay_username=quay_username,
|
quay_username=quay_username,
|
||||||
quay_password=quay_password,
|
quay_password=quay_password,
|
||||||
etcd_token=etcd_token,
|
etcd_token=etcd_token,
|
||||||
manager_ip=manager_ip,
|
manager_hostname=manager_hostname,
|
||||||
coreos_channel=coreos_channel,
|
coreos_channel=coreos_channel,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -108,7 +108,7 @@ class EC2Executor(BuilderExecutor):
|
||||||
channel = self.executor_config.get('COREOS_CHANNEL', 'stable')
|
channel = self.executor_config.get('COREOS_CHANNEL', 'stable')
|
||||||
get_ami_callable = partial(self._get_coreos_ami, region, channel)
|
get_ami_callable = partial(self._get_coreos_ami, region, channel)
|
||||||
coreos_ami = yield From(self._loop.run_in_executor(None, get_ami_callable))
|
coreos_ami = yield From(self._loop.run_in_executor(None, get_ami_callable))
|
||||||
user_data = self.generate_cloud_config(realm, token, channel, self.manager_public_ip)
|
user_data = self.generate_cloud_config(realm, token, channel, self.manager_hostname)
|
||||||
|
|
||||||
logger.debug('Generated cloud config: %s', user_data)
|
logger.debug('Generated cloud config: %s', user_data)
|
||||||
|
|
||||||
|
@ -155,10 +155,10 @@ class EC2Executor(BuilderExecutor):
|
||||||
class PopenExecutor(BuilderExecutor):
|
class PopenExecutor(BuilderExecutor):
|
||||||
""" Implementation of BuilderExecutor which uses Popen to fork a quay-builder process.
|
""" Implementation of BuilderExecutor which uses Popen to fork a quay-builder process.
|
||||||
"""
|
"""
|
||||||
def __init__(self, executor_config, manager_public_ip):
|
def __init__(self, executor_config, manager_hostname):
|
||||||
self._jobs = {}
|
self._jobs = {}
|
||||||
|
|
||||||
super(PopenExecutor, self).__init__(executor_config, manager_public_ip)
|
super(PopenExecutor, self).__init__(executor_config, manager_hostname)
|
||||||
|
|
||||||
""" Executor which uses Popen to fork a quay-builder process.
|
""" Executor which uses Popen to fork a quay-builder process.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -37,7 +37,7 @@ class BuilderServer(object):
|
||||||
controller.
|
controller.
|
||||||
"""
|
"""
|
||||||
def __init__(self, registry_hostname, queue, build_logs, user_files, lifecycle_manager_klass,
|
def __init__(self, registry_hostname, queue, build_logs, user_files, lifecycle_manager_klass,
|
||||||
lifecycle_manager_config, manager_public_ip):
|
lifecycle_manager_config, manager_hostname):
|
||||||
self._loop = None
|
self._loop = None
|
||||||
self._current_status = 'starting'
|
self._current_status = 'starting'
|
||||||
self._current_components = []
|
self._current_components = []
|
||||||
|
@ -53,7 +53,7 @@ class BuilderServer(object):
|
||||||
self._unregister_component,
|
self._unregister_component,
|
||||||
self._job_heartbeat,
|
self._job_heartbeat,
|
||||||
self._job_complete,
|
self._job_complete,
|
||||||
manager_public_ip,
|
manager_hostname,
|
||||||
HEARTBEAT_PERIOD_SEC,
|
HEARTBEAT_PERIOD_SEC,
|
||||||
)
|
)
|
||||||
self._lifecycle_manager_config = lifecycle_manager_config
|
self._lifecycle_manager_config = lifecycle_manager_config
|
||||||
|
@ -158,7 +158,7 @@ class BuilderServer(object):
|
||||||
self._queue.incomplete(job_item, restore_retry=False)
|
self._queue.incomplete(job_item, restore_retry=False)
|
||||||
|
|
||||||
logger.debug('Build job found. Checking for an avaliable worker.')
|
logger.debug('Build job found. Checking for an avaliable worker.')
|
||||||
scheduled = yield From(self._lifecycle_manager.schedule(build_job, self._loop))
|
scheduled = yield From(self._lifecycle_manager.schedule(build_job))
|
||||||
if scheduled:
|
if scheduled:
|
||||||
self._job_count = self._job_count + 1
|
self._job_count = self._job_count + 1
|
||||||
logger.debug('Build job scheduled. Running: %s', self._job_count)
|
logger.debug('Build job scheduled. Running: %s', self._job_count)
|
||||||
|
@ -168,7 +168,6 @@ class BuilderServer(object):
|
||||||
|
|
||||||
yield From(trollius.sleep(WORK_CHECK_TIMEOUT))
|
yield From(trollius.sleep(WORK_CHECK_TIMEOUT))
|
||||||
|
|
||||||
|
|
||||||
@trollius.coroutine
|
@trollius.coroutine
|
||||||
def _initialize(self, loop, host, ssl=None):
|
def _initialize(self, loop, host, ssl=None):
|
||||||
self._loop = loop
|
self._loop = loop
|
||||||
|
|
|
@ -6,7 +6,7 @@ write_files:
|
||||||
content: |
|
content: |
|
||||||
REALM={{ realm }}
|
REALM={{ realm }}
|
||||||
TOKEN={{ token }}
|
TOKEN={{ token }}
|
||||||
ENDPOINT=wss://buildman.quay.io:8787
|
SERVER=wss://{{ manager_hostname }}
|
||||||
|
|
||||||
coreos:
|
coreos:
|
||||||
update:
|
update:
|
||||||
|
@ -31,7 +31,6 @@ coreos:
|
||||||
[Service]
|
[Service]
|
||||||
TimeoutStartSec=600
|
TimeoutStartSec=600
|
||||||
TimeoutStopSec=2000
|
TimeoutStopSec=2000
|
||||||
ExecStartPre=/bin/sh -xc "echo '{{ manager_ip }} buildman.quay.io' >> /etc/hosts; exit 0"
|
|
||||||
ExecStartPre=/usr/bin/docker login -u {{ quay_username }} -p {{ quay_password }} -e unused quay.io
|
ExecStartPre=/usr/bin/docker login -u {{ quay_username }} -p {{ quay_password }} -e unused quay.io
|
||||||
ExecStart=/usr/bin/docker run --rm --net=host --name quay-builder --privileged --env-file /root/overrides.list -v /var/run/docker.sock:/var/run/docker.sock quay.io/coreos/registry-build-worker:latest
|
ExecStart=/usr/bin/docker run --rm --net=host --name quay-builder --privileged --env-file /root/overrides.list -v /var/run/docker.sock:/var/run/docker.sock quay.io/coreos/registry-build-worker:latest
|
||||||
ExecStop=/usr/bin/docker stop quay-builder
|
ExecStop=/usr/bin/docker stop quay-builder
|
||||||
|
|
|
@ -78,7 +78,8 @@ class WorkQueue(object):
|
||||||
def get(self, processing_time=300):
|
def get(self, processing_time=300):
|
||||||
"""
|
"""
|
||||||
Get an available item and mark it as unavailable for the default of five
|
Get an available item and mark it as unavailable for the default of five
|
||||||
minutes.
|
minutes. The result of this method must always be composed of simple
|
||||||
|
python objects which are JSON serializable for network portability reasons.
|
||||||
"""
|
"""
|
||||||
now = datetime.utcnow()
|
now = datetime.utcnow()
|
||||||
|
|
||||||
|
|
|
@ -4,19 +4,20 @@ import os.path
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from trollius import coroutine, get_event_loop, From, Future, sleep
|
from trollius import coroutine, get_event_loop, From, Future, sleep, Return
|
||||||
from mock import Mock
|
from mock import Mock
|
||||||
from threading import Event
|
from threading import Event
|
||||||
from urllib3.exceptions import ReadTimeoutError
|
from urllib3.exceptions import ReadTimeoutError
|
||||||
|
|
||||||
from buildman.manager.executor import BuilderExecutor
|
from buildman.manager.executor import BuilderExecutor
|
||||||
from buildman.manager.ephemeral import (EphemeralBuilderManager, ETCD_BUILDER_PREFIX,
|
from buildman.manager.ephemeral import (EphemeralBuilderManager, ETCD_BUILDER_PREFIX,
|
||||||
ETCD_EXPIRE_RESULT)
|
ETCD_REALM_PREFIX, EtcdAction)
|
||||||
from buildman.server import BuildJobResult
|
from buildman.server import BuildJobResult
|
||||||
from buildman.component.buildcomponent import BuildComponent
|
from buildman.component.buildcomponent import BuildComponent
|
||||||
|
|
||||||
|
|
||||||
BUILD_UUID = 'deadbeef-dead-beef-dead-deadbeefdead'
|
BUILD_UUID = 'deadbeef-dead-beef-dead-deadbeefdead'
|
||||||
|
REALM_ID = '1234-realm'
|
||||||
|
|
||||||
|
|
||||||
def async_test(f):
|
def async_test(f):
|
||||||
|
@ -43,17 +44,17 @@ class TestEphemeral(unittest.TestCase):
|
||||||
self.etcd_client_mock.watch = Mock(side_effect=hang_until_event)
|
self.etcd_client_mock.watch = Mock(side_effect=hang_until_event)
|
||||||
return self.etcd_client_mock
|
return self.etcd_client_mock
|
||||||
|
|
||||||
def _create_mock_executor(self, *args, **kwargs):
|
def _create_completed_future(self, result=None):
|
||||||
def create_completed_future(result=None):
|
def inner(*args, **kwargs):
|
||||||
def inner(*args, **kwargs):
|
new_future = Future()
|
||||||
new_future = Future()
|
new_future.set_result(result)
|
||||||
new_future.set_result(result)
|
return new_future
|
||||||
return new_future
|
return inner
|
||||||
return inner
|
|
||||||
|
|
||||||
|
def _create_mock_executor(self, *args, **kwargs):
|
||||||
self.test_executor = Mock(spec=BuilderExecutor)
|
self.test_executor = Mock(spec=BuilderExecutor)
|
||||||
self.test_executor.start_builder = Mock(side_effect=create_completed_future('123'))
|
self.test_executor.start_builder = Mock(side_effect=self._create_completed_future('123'))
|
||||||
self.test_executor.stop_builder = Mock(side_effect=create_completed_future())
|
self.test_executor.stop_builder = Mock(side_effect=self._create_completed_future())
|
||||||
return self.test_executor
|
return self.test_executor
|
||||||
|
|
||||||
def _create_build_job(self):
|
def _create_build_job(self):
|
||||||
|
@ -61,6 +62,10 @@ class TestEphemeral(unittest.TestCase):
|
||||||
mock_job.job_details = {
|
mock_job.job_details = {
|
||||||
'build_uuid': BUILD_UUID,
|
'build_uuid': BUILD_UUID,
|
||||||
}
|
}
|
||||||
|
mock_job.job_item = {
|
||||||
|
'body': json.dumps(mock_job.job_details),
|
||||||
|
'id': 1,
|
||||||
|
}
|
||||||
return mock_job
|
return mock_job
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
@ -71,13 +76,13 @@ class TestEphemeral(unittest.TestCase):
|
||||||
self.etcd_wait_event.clear()
|
self.etcd_wait_event.clear()
|
||||||
|
|
||||||
self.register_component_callback = Mock()
|
self.register_component_callback = Mock()
|
||||||
self.uniregister_component_callback = Mock()
|
self.unregister_component_callback = Mock()
|
||||||
self.job_heartbeat_callback = Mock()
|
self.job_heartbeat_callback = Mock()
|
||||||
self.job_complete_callback = Mock()
|
self.job_complete_callback = Mock()
|
||||||
|
|
||||||
self.manager = EphemeralBuilderManager(
|
self.manager = EphemeralBuilderManager(
|
||||||
self.register_component_callback,
|
self.register_component_callback,
|
||||||
self.uniregister_component_callback,
|
self.unregister_component_callback,
|
||||||
self.job_heartbeat_callback,
|
self.job_heartbeat_callback,
|
||||||
self.job_complete_callback,
|
self.job_complete_callback,
|
||||||
'127.0.0.1',
|
'127.0.0.1',
|
||||||
|
@ -97,15 +102,19 @@ class TestEphemeral(unittest.TestCase):
|
||||||
del EphemeralBuilderManager._executors['test']
|
del EphemeralBuilderManager._executors['test']
|
||||||
EphemeralBuilderManager._etcd_client_klass = self.old_etcd_client_klass
|
EphemeralBuilderManager._etcd_client_klass = self.old_etcd_client_klass
|
||||||
|
|
||||||
@async_test
|
@coroutine
|
||||||
def test_schedule_and_complete(self):
|
def _setup_job_for_managers(self):
|
||||||
|
# Test that we are watching the realm location before anything else happens
|
||||||
|
self.etcd_client_mock.watch.assert_any_call(ETCD_REALM_PREFIX, recursive=True, timeout=0)
|
||||||
|
|
||||||
self.etcd_client_mock.read = Mock(side_effect=KeyError)
|
self.etcd_client_mock.read = Mock(side_effect=KeyError)
|
||||||
test_component = BuildComponent(None)
|
test_component = Mock(spec=BuildComponent)
|
||||||
|
test_component.builder_realm = REALM_ID
|
||||||
|
test_component.start_build = Mock(side_effect=self._create_completed_future())
|
||||||
self.register_component_callback.return_value = test_component
|
self.register_component_callback.return_value = test_component
|
||||||
|
|
||||||
# Ask for a builder to be scheduled
|
# Ask for a builder to be scheduled
|
||||||
loop = get_event_loop()
|
is_scheduled = yield From(self.manager.schedule(self.mock_job))
|
||||||
is_scheduled = yield From(self.manager.schedule(self.mock_job, loop))
|
|
||||||
|
|
||||||
self.assertTrue(is_scheduled)
|
self.assertTrue(is_scheduled)
|
||||||
|
|
||||||
|
@ -114,29 +123,76 @@ class TestEphemeral(unittest.TestCase):
|
||||||
self.assertEqual(self.etcd_client_mock.write.call_args_list[0][0][0], self.mock_job_key)
|
self.assertEqual(self.etcd_client_mock.write.call_args_list[0][0][0], self.mock_job_key)
|
||||||
self.assertEqual(self.etcd_client_mock.write.call_args_list[1][0][0], self.mock_job_key)
|
self.assertEqual(self.etcd_client_mock.write.call_args_list[1][0][0], self.mock_job_key)
|
||||||
|
|
||||||
|
# Right now the job is not registered with any managers because etcd has not accepted the job
|
||||||
|
self.assertEqual(self.register_component_callback.call_count, 0)
|
||||||
|
|
||||||
|
realm_created = Mock(spec=etcd.EtcdResult)
|
||||||
|
realm_created.action = EtcdAction.SET
|
||||||
|
realm_created.key = os.path.join(ETCD_REALM_PREFIX, REALM_ID)
|
||||||
|
realm_created.value = json.dumps({
|
||||||
|
'realm': REALM_ID,
|
||||||
|
'token': 'beef',
|
||||||
|
'builder_id': '123',
|
||||||
|
'job_queue_item': self.mock_job.job_item,
|
||||||
|
})
|
||||||
|
|
||||||
|
self.manager._handle_realm_change(realm_created)
|
||||||
|
|
||||||
self.assertEqual(self.register_component_callback.call_count, 1)
|
self.assertEqual(self.register_component_callback.call_count, 1)
|
||||||
|
|
||||||
|
raise Return(test_component)
|
||||||
|
|
||||||
|
@async_test
|
||||||
|
def test_schedule_and_complete(self):
|
||||||
|
# Test that a job is properly registered with all of the managers
|
||||||
|
test_component = yield From(self._setup_job_for_managers())
|
||||||
|
|
||||||
|
# Take the job ourselves
|
||||||
|
yield From(self.manager.build_component_ready(test_component))
|
||||||
|
|
||||||
|
self.etcd_client_mock.delete.assert_called_once_with(os.path.join(ETCD_REALM_PREFIX, REALM_ID))
|
||||||
|
self.etcd_client_mock.delete.reset_mock()
|
||||||
|
|
||||||
|
# Finish the job
|
||||||
yield From(self.manager.job_completed(self.mock_job, BuildJobResult.COMPLETE, test_component))
|
yield From(self.manager.job_completed(self.mock_job, BuildJobResult.COMPLETE, test_component))
|
||||||
|
|
||||||
self.assertEqual(self.test_executor.stop_builder.call_count, 1)
|
self.assertEqual(self.test_executor.stop_builder.call_count, 1)
|
||||||
self.etcd_client_mock.delete.assert_called_once_with(self.mock_job_key)
|
self.etcd_client_mock.delete.assert_called_once_with(self.mock_job_key)
|
||||||
|
|
||||||
|
@async_test
|
||||||
|
def test_another_manager_takes_job(self):
|
||||||
|
# Prepare a job to be taken by another manager
|
||||||
|
test_component = yield From(self._setup_job_for_managers())
|
||||||
|
|
||||||
|
realm_deleted = Mock(spec=etcd.EtcdResult)
|
||||||
|
realm_deleted.action = EtcdAction.DELETE
|
||||||
|
realm_deleted.key = os.path.join(ETCD_REALM_PREFIX, REALM_ID)
|
||||||
|
|
||||||
|
realm_deleted._prev_node = Mock(spec=etcd.EtcdResult)
|
||||||
|
realm_deleted._prev_node.value = json.dumps({
|
||||||
|
'realm': REALM_ID,
|
||||||
|
'token': 'beef',
|
||||||
|
'builder_id': '123',
|
||||||
|
'job_queue_item': self.mock_job.job_item,
|
||||||
|
})
|
||||||
|
|
||||||
|
self.manager._handle_realm_change(realm_deleted)
|
||||||
|
|
||||||
|
self.unregister_component_callback.assert_called_once_with(test_component)
|
||||||
|
|
||||||
@async_test
|
@async_test
|
||||||
def test_expiring_worker(self):
|
def test_expiring_worker(self):
|
||||||
# Test that we are watching before anything else happens
|
# Test that we are watching before anything else happens
|
||||||
self.etcd_client_mock.watch.assert_called_once_with(ETCD_BUILDER_PREFIX, recursive=True,
|
self.etcd_client_mock.watch.assert_any_call(ETCD_BUILDER_PREFIX, recursive=True, timeout=0)
|
||||||
timeout=0)
|
|
||||||
|
|
||||||
# Send a signal to the callback that a worker has expired
|
# Send a signal to the callback that a worker has expired
|
||||||
expired_result = Mock(spec=etcd.EtcdResult)
|
expired_result = Mock(spec=etcd.EtcdResult)
|
||||||
expired_result.action = ETCD_EXPIRE_RESULT
|
expired_result.action = EtcdAction.EXPIRE
|
||||||
expired_result.key = self.mock_job_key
|
expired_result.key = self.mock_job_key
|
||||||
expired_result._prev_node = Mock(spec=etcd.EtcdResult)
|
expired_result._prev_node = Mock(spec=etcd.EtcdResult)
|
||||||
expired_result._prev_node.value = json.dumps({'builder_id': '1234'})
|
expired_result._prev_node.value = json.dumps({'builder_id': '1234'})
|
||||||
expired_future = Future()
|
|
||||||
expired_future.set_result(expired_result)
|
|
||||||
|
|
||||||
self.manager._handle_key_expiration(expired_future)
|
self.manager._handle_builder_expiration(expired_result)
|
||||||
|
|
||||||
yield From(sleep(.01))
|
yield From(sleep(.01))
|
||||||
|
|
||||||
|
@ -151,10 +207,8 @@ class TestEphemeral(unittest.TestCase):
|
||||||
set_result = Mock(sepc=etcd.EtcdResult)
|
set_result = Mock(sepc=etcd.EtcdResult)
|
||||||
set_result.action = 'set'
|
set_result.action = 'set'
|
||||||
set_result.key = self.mock_job_key
|
set_result.key = self.mock_job_key
|
||||||
set_future = Future()
|
|
||||||
set_future.set_result(set_result)
|
|
||||||
|
|
||||||
self.manager._handle_key_expiration(set_future)
|
self.manager._handle_builder_expiration(set_result)
|
||||||
|
|
||||||
yield From(sleep(.01))
|
yield From(sleep(.01))
|
||||||
|
|
||||||
|
@ -179,15 +233,3 @@ class TestEphemeral(unittest.TestCase):
|
||||||
self.job_heartbeat_callback.assert_called_once_with(self.mock_job)
|
self.job_heartbeat_callback.assert_called_once_with(self.mock_job)
|
||||||
self.assertEqual(self.etcd_client_mock.write.call_count, 1)
|
self.assertEqual(self.etcd_client_mock.write.call_count, 1)
|
||||||
self.assertEqual(self.etcd_client_mock.write.call_args_list[0][0][0], self.mock_job_key)
|
self.assertEqual(self.etcd_client_mock.write.call_args_list[0][0][0], self.mock_job_key)
|
||||||
|
|
||||||
@async_test
|
|
||||||
def test_etcd_read_timeout(self):
|
|
||||||
# Send a signal to the callback that a worker key has been changed
|
|
||||||
read_timeout_future = Future()
|
|
||||||
read_timeout_future.set_exception(ReadTimeoutError(None, None, None))
|
|
||||||
|
|
||||||
self.manager._handle_key_expiration(read_timeout_future)
|
|
||||||
|
|
||||||
yield From(sleep(.01))
|
|
||||||
|
|
||||||
self.assertEquals(self.test_executor.stop_builder.call_count, 0)
|
|
||||||
|
|
Reference in a new issue