2014-11-11 23:23:15 +00:00
|
|
|
import logging
|
|
|
|
import trollius
|
|
|
|
|
|
|
|
from autobahn.asyncio.wamp import RouterFactory, RouterSessionFactory
|
2014-11-18 20:45:56 +00:00
|
|
|
from autobahn.asyncio.websocket import WampWebSocketServerFactory
|
2014-11-11 23:23:15 +00:00
|
|
|
from autobahn.wamp import types
|
|
|
|
|
|
|
|
from aiowsgi import create_server as create_wsgi_server
|
|
|
|
from flask import Flask
|
2014-11-18 20:45:56 +00:00
|
|
|
from threading import Event
|
2014-11-11 23:23:15 +00:00
|
|
|
from trollius.coroutines import From
|
2014-12-16 18:37:40 +00:00
|
|
|
from datetime import timedelta
|
2014-11-11 23:23:15 +00:00
|
|
|
|
2014-11-25 21:14:44 +00:00
|
|
|
from buildman.jobutil.buildjob import BuildJob, BuildJobLoadException
|
2014-11-21 19:27:06 +00:00
|
|
|
from data.queue import WorkQueue
|
2014-11-12 19:03:07 +00:00
|
|
|
|
2014-11-30 22:48:02 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
2014-11-11 23:23:15 +00:00
|
|
|
|
2014-11-14 00:41:17 +00:00
|
|
|
WORK_CHECK_TIMEOUT = 10
|
2014-11-11 23:23:15 +00:00
|
|
|
TIMEOUT_PERIOD_MINUTES = 20
|
2014-11-21 19:27:06 +00:00
|
|
|
JOB_TIMEOUT_SECONDS = 300
|
|
|
|
MINIMUM_JOB_EXTENSION = timedelta(minutes=2)
|
2014-11-11 23:23:15 +00:00
|
|
|
|
2014-11-25 23:08:18 +00:00
|
|
|
WEBSOCKET_PORT = 8787
|
|
|
|
CONTROLLER_PORT = 8686
|
2014-11-25 21:14:44 +00:00
|
|
|
|
2014-12-22 22:24:44 +00:00
|
|
|
HEARTBEAT_PERIOD_SEC = 30
|
|
|
|
|
2014-11-18 20:45:56 +00:00
|
|
|
class BuildJobResult(object):
|
2014-11-14 00:41:17 +00:00
|
|
|
""" Build job result enum """
|
|
|
|
INCOMPLETE = 'incomplete'
|
|
|
|
COMPLETE = 'complete'
|
|
|
|
ERROR = 'error'
|
|
|
|
|
2014-11-11 23:23:15 +00:00
|
|
|
class BuilderServer(object):
|
|
|
|
""" Server which handles both HTTP and WAMP requests, managing the full state of the build
|
|
|
|
controller.
|
|
|
|
"""
|
2014-12-16 18:41:30 +00:00
|
|
|
def __init__(self, registry_hostname, queue, build_logs, user_files, lifecycle_manager_klass,
|
|
|
|
lifecycle_manager_config, manager_public_ip):
|
2014-11-18 20:45:56 +00:00
|
|
|
self._loop = None
|
|
|
|
self._current_status = 'starting'
|
|
|
|
self._current_components = []
|
|
|
|
self._job_count = 0
|
2014-11-11 23:23:15 +00:00
|
|
|
|
2014-11-18 20:45:56 +00:00
|
|
|
self._session_factory = RouterSessionFactory(RouterFactory())
|
2014-12-16 18:41:30 +00:00
|
|
|
self._registry_hostname = registry_hostname
|
2014-11-11 23:23:15 +00:00
|
|
|
self._queue = queue
|
|
|
|
self._build_logs = build_logs
|
|
|
|
self._user_files = user_files
|
|
|
|
self._lifecycle_manager = lifecycle_manager_klass(
|
2014-11-18 20:45:56 +00:00
|
|
|
self._register_component,
|
|
|
|
self._unregister_component,
|
2014-11-21 19:27:06 +00:00
|
|
|
self._job_heartbeat,
|
2014-12-16 18:41:30 +00:00
|
|
|
self._job_complete,
|
|
|
|
manager_public_ip,
|
2014-12-22 22:24:44 +00:00
|
|
|
HEARTBEAT_PERIOD_SEC,
|
2014-11-18 20:45:56 +00:00
|
|
|
)
|
2014-12-16 18:41:30 +00:00
|
|
|
self._lifecycle_manager_config = lifecycle_manager_config
|
2014-11-11 23:23:15 +00:00
|
|
|
|
|
|
|
self._shutdown_event = Event()
|
|
|
|
self._current_status = 'running'
|
|
|
|
|
|
|
|
self._register_controller()
|
|
|
|
|
|
|
|
def _register_controller(self):
|
|
|
|
controller_app = Flask('controller')
|
|
|
|
server = self
|
|
|
|
|
|
|
|
@controller_app.route('/status')
|
|
|
|
def status():
|
|
|
|
return server._current_status
|
|
|
|
|
|
|
|
self._controller_app = controller_app
|
|
|
|
|
2014-11-25 21:36:21 +00:00
|
|
|
def run(self, host, ssl=None):
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.debug('Initializing the lifecycle manager')
|
2014-12-16 18:41:30 +00:00
|
|
|
self._lifecycle_manager.initialize(self._lifecycle_manager_config)
|
2014-11-11 23:23:15 +00:00
|
|
|
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.debug('Initializing all members of the event loop')
|
2014-11-11 23:23:15 +00:00
|
|
|
loop = trollius.get_event_loop()
|
2014-11-25 21:36:21 +00:00
|
|
|
trollius.Task(self._initialize(loop, host, ssl))
|
2014-11-11 23:23:15 +00:00
|
|
|
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.debug('Starting server on port %s, with controller on port %s', WEBSOCKET_PORT,
|
2014-11-25 23:08:18 +00:00
|
|
|
CONTROLLER_PORT)
|
2014-11-11 23:23:15 +00:00
|
|
|
try:
|
2014-11-18 20:45:56 +00:00
|
|
|
loop.run_forever()
|
2014-11-11 23:23:15 +00:00
|
|
|
except KeyboardInterrupt:
|
2014-11-18 20:45:56 +00:00
|
|
|
pass
|
2014-11-11 23:23:15 +00:00
|
|
|
finally:
|
2014-11-18 20:45:56 +00:00
|
|
|
loop.close()
|
2014-11-11 23:23:15 +00:00
|
|
|
|
|
|
|
def close(self):
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.debug('Requested server shutdown')
|
2014-11-11 23:23:15 +00:00
|
|
|
self._current_status = 'shutting_down'
|
|
|
|
self._lifecycle_manager.shutdown()
|
|
|
|
self._shutdown_event.wait()
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.debug('Shutting down server')
|
2014-11-11 23:23:15 +00:00
|
|
|
|
|
|
|
def _register_component(self, realm, component_klass, **kwargs):
|
|
|
|
""" Registers a component with the server. The component_klass must derive from
|
|
|
|
BaseComponent.
|
|
|
|
"""
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.debug('Registering component with realm %s', realm)
|
2014-11-11 23:23:15 +00:00
|
|
|
|
2014-11-18 20:45:56 +00:00
|
|
|
component = component_klass(types.ComponentConfig(realm=realm), realm=realm, **kwargs)
|
2014-11-11 23:23:15 +00:00
|
|
|
component.server = self
|
|
|
|
component.parent_manager = self._lifecycle_manager
|
|
|
|
component.build_logs = self._build_logs
|
|
|
|
component.user_files = self._user_files
|
2014-12-16 18:41:30 +00:00
|
|
|
component.registry_hostname = self._registry_hostname
|
2014-11-11 23:23:15 +00:00
|
|
|
|
|
|
|
self._current_components.append(component)
|
|
|
|
self._session_factory.add(component)
|
|
|
|
return component
|
|
|
|
|
|
|
|
def _unregister_component(self, component):
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.debug('Unregistering component with realm %s and token %s',
|
2014-11-18 20:45:56 +00:00
|
|
|
component.builder_realm, component.expected_token)
|
2014-11-11 23:23:15 +00:00
|
|
|
|
|
|
|
self._current_components.remove(component)
|
|
|
|
self._session_factory.remove(component)
|
|
|
|
|
2014-11-21 19:27:06 +00:00
|
|
|
def _job_heartbeat(self, build_job):
|
2014-12-16 18:41:30 +00:00
|
|
|
WorkQueue.extend_processing(build_job.job_item, seconds_from_now=JOB_TIMEOUT_SECONDS,
|
2014-11-21 19:27:06 +00:00
|
|
|
retry_count=1, minimum_extension=MINIMUM_JOB_EXTENSION)
|
|
|
|
|
2014-11-12 19:03:07 +00:00
|
|
|
def _job_complete(self, build_job, job_status):
|
2014-11-18 20:45:56 +00:00
|
|
|
if job_status == BuildJobResult.INCOMPLETE:
|
2014-12-16 18:41:30 +00:00
|
|
|
self._queue.incomplete(build_job.job_item, restore_retry=True, retry_after=30)
|
2014-11-18 20:45:56 +00:00
|
|
|
elif job_status == BuildJobResult.ERROR:
|
2014-12-16 18:41:30 +00:00
|
|
|
self._queue.incomplete(build_job.job_item, restore_retry=False)
|
2014-11-11 23:23:15 +00:00
|
|
|
else:
|
2014-12-16 18:41:30 +00:00
|
|
|
self._queue.complete(build_job.job_item)
|
2014-11-11 23:23:15 +00:00
|
|
|
|
|
|
|
self._job_count = self._job_count - 1
|
|
|
|
|
|
|
|
if self._current_status == 'shutting_down' and not self._job_count:
|
|
|
|
self._shutdown_event.set()
|
|
|
|
|
2014-11-20 19:36:22 +00:00
|
|
|
# TODO(jschorr): check for work here?
|
2014-11-14 00:41:17 +00:00
|
|
|
|
2014-11-11 23:23:15 +00:00
|
|
|
@trollius.coroutine
|
|
|
|
def _work_checker(self):
|
|
|
|
while self._current_status == 'running':
|
2014-12-22 22:24:44 +00:00
|
|
|
logger.debug('Checking for more work for %d active workers',
|
|
|
|
self._lifecycle_manager.num_workers())
|
2014-11-21 19:27:06 +00:00
|
|
|
job_item = self._queue.get(processing_time=self._lifecycle_manager.setup_time())
|
2014-11-11 23:23:15 +00:00
|
|
|
if job_item is None:
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.debug('No additional work found. Going to sleep for %s seconds', WORK_CHECK_TIMEOUT)
|
2014-11-11 23:23:15 +00:00
|
|
|
yield From(trollius.sleep(WORK_CHECK_TIMEOUT))
|
|
|
|
continue
|
|
|
|
|
2014-11-12 19:03:07 +00:00
|
|
|
try:
|
|
|
|
build_job = BuildJob(job_item)
|
|
|
|
except BuildJobLoadException as irbe:
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.exception(irbe)
|
2014-11-12 19:03:07 +00:00
|
|
|
self._queue.incomplete(job_item, restore_retry=False)
|
|
|
|
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.debug('Build job found. Checking for an avaliable worker.')
|
2014-12-22 17:14:16 +00:00
|
|
|
scheduled = yield From(self._lifecycle_manager.schedule(build_job, self._loop))
|
|
|
|
if scheduled:
|
2014-11-11 23:23:15 +00:00
|
|
|
self._job_count = self._job_count + 1
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.debug('Build job scheduled. Running: %s', self._job_count)
|
2014-11-11 23:23:15 +00:00
|
|
|
else:
|
2014-11-30 22:48:02 +00:00
|
|
|
logger.debug('All workers are busy. Requeuing.')
|
2014-11-14 00:41:17 +00:00
|
|
|
self._queue.incomplete(job_item, restore_retry=True, retry_after=0)
|
2014-11-18 20:45:56 +00:00
|
|
|
|
2014-11-11 23:23:15 +00:00
|
|
|
yield From(trollius.sleep(WORK_CHECK_TIMEOUT))
|
|
|
|
|
|
|
|
|
|
|
|
@trollius.coroutine
|
2014-11-25 21:36:21 +00:00
|
|
|
def _initialize(self, loop, host, ssl=None):
|
2014-11-12 19:03:07 +00:00
|
|
|
self._loop = loop
|
2014-11-18 20:45:56 +00:00
|
|
|
|
2014-11-11 23:23:15 +00:00
|
|
|
# Create the WAMP server.
|
2014-11-18 21:35:03 +00:00
|
|
|
transport_factory = WampWebSocketServerFactory(self._session_factory, debug_wamp=False)
|
2014-11-18 20:45:56 +00:00
|
|
|
transport_factory.setProtocolOptions(failByDrop=True)
|
2014-11-11 23:23:15 +00:00
|
|
|
|
|
|
|
# Initialize the controller server and the WAMP server
|
2014-11-25 21:36:21 +00:00
|
|
|
create_wsgi_server(self._controller_app, loop=loop, host=host, port=CONTROLLER_PORT, ssl=ssl)
|
|
|
|
yield From(loop.create_server(transport_factory, host, WEBSOCKET_PORT, ssl=ssl))
|
2014-11-11 23:23:15 +00:00
|
|
|
|
|
|
|
# Initialize the work queue checker.
|
2014-11-18 21:34:09 +00:00
|
|
|
yield From(self._work_checker())
|