This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/buildman/server.py

178 lines
5.9 KiB
Python
Raw Normal View History

import logging
import trollius
from autobahn.asyncio.wamp import RouterFactory, RouterSessionFactory
2014-11-18 20:45:56 +00:00
from autobahn.asyncio.websocket import WampWebSocketServerFactory
from autobahn.wamp import types
from aiowsgi import create_server as create_wsgi_server
from flask import Flask
2014-11-18 20:45:56 +00:00
from threading import Event
from trollius.coroutines import From
from datetime import datetime, timedelta
from buildman.jobutil.buildjob import BuildJob, BuildJobLoadException
from data.queue import WorkQueue
logger = logging.getLogger(__name__)
WORK_CHECK_TIMEOUT = 10
TIMEOUT_PERIOD_MINUTES = 20
JOB_TIMEOUT_SECONDS = 300
MINIMUM_JOB_EXTENSION = timedelta(minutes=2)
WEBSOCKET_PORT = 8787
CONTROLLER_PORT = 8686
2014-11-18 20:45:56 +00:00
class BuildJobResult(object):
""" Build job result enum """
INCOMPLETE = 'incomplete'
COMPLETE = 'complete'
ERROR = 'error'
class BuilderServer(object):
""" Server which handles both HTTP and WAMP requests, managing the full state of the build
controller.
"""
def __init__(self, server_hostname, queue, build_logs, user_files, lifecycle_manager_klass):
2014-11-18 20:45:56 +00:00
self._loop = None
self._current_status = 'starting'
self._current_components = []
self._job_count = 0
2014-11-18 20:45:56 +00:00
self._session_factory = RouterSessionFactory(RouterFactory())
self._server_hostname = server_hostname
self._queue = queue
self._build_logs = build_logs
self._user_files = user_files
self._lifecycle_manager = lifecycle_manager_klass(
2014-11-18 20:45:56 +00:00
self._register_component,
self._unregister_component,
self._job_heartbeat,
2014-11-18 20:45:56 +00:00
self._job_complete
)
self._shutdown_event = Event()
self._current_status = 'running'
self._register_controller()
def _register_controller(self):
controller_app = Flask('controller')
server = self
@controller_app.route('/status')
def status():
return server._current_status
self._controller_app = controller_app
def run(self, host, ssl=None):
logger.debug('Initializing the lifecycle manager')
self._lifecycle_manager.initialize()
logger.debug('Initializing all members of the event loop')
loop = trollius.get_event_loop()
trollius.Task(self._initialize(loop, host, ssl))
logger.debug('Starting server on port %s, with controller on port %s', WEBSOCKET_PORT,
CONTROLLER_PORT)
try:
2014-11-18 20:45:56 +00:00
loop.run_forever()
except KeyboardInterrupt:
2014-11-18 20:45:56 +00:00
pass
finally:
2014-11-18 20:45:56 +00:00
loop.close()
def close(self):
logger.debug('Requested server shutdown')
self._current_status = 'shutting_down'
self._lifecycle_manager.shutdown()
self._shutdown_event.wait()
logger.debug('Shutting down server')
def _register_component(self, realm, component_klass, **kwargs):
""" Registers a component with the server. The component_klass must derive from
BaseComponent.
"""
logger.debug('Registering component with realm %s', realm)
2014-11-18 20:45:56 +00:00
component = component_klass(types.ComponentConfig(realm=realm), realm=realm, **kwargs)
component.server = self
component.parent_manager = self._lifecycle_manager
component.build_logs = self._build_logs
component.user_files = self._user_files
component.server_hostname = self._server_hostname
self._current_components.append(component)
self._session_factory.add(component)
return component
def _unregister_component(self, component):
logger.debug('Unregistering component with realm %s and token %s',
2014-11-18 20:45:56 +00:00
component.builder_realm, component.expected_token)
self._current_components.remove(component)
self._session_factory.remove(component)
def _job_heartbeat(self, build_job):
WorkQueue.extend_processing(build_job.job_item(), seconds_from_now=JOB_TIMEOUT_SECONDS,
retry_count=1, minimum_extension=MINIMUM_JOB_EXTENSION)
def _job_complete(self, build_job, job_status):
2014-11-18 20:45:56 +00:00
if job_status == BuildJobResult.INCOMPLETE:
self._queue.incomplete(build_job.job_item(), restore_retry=True, retry_after=30)
2014-11-18 20:45:56 +00:00
elif job_status == BuildJobResult.ERROR:
self._queue.incomplete(build_job.job_item(), restore_retry=False)
else:
self._queue.complete(build_job.job_item())
self._job_count = self._job_count - 1
if self._current_status == 'shutting_down' and not self._job_count:
self._shutdown_event.set()
# TODO(jschorr): check for work here?
@trollius.coroutine
def _work_checker(self):
while self._current_status == 'running':
logger.debug('Checking for more work')
job_item = self._queue.get(processing_time=self._lifecycle_manager.setup_time())
if job_item is None:
logger.debug('No additional work found. Going to sleep for %s seconds', WORK_CHECK_TIMEOUT)
yield From(trollius.sleep(WORK_CHECK_TIMEOUT))
continue
try:
build_job = BuildJob(job_item)
except BuildJobLoadException as irbe:
logger.exception(irbe)
self._queue.incomplete(job_item, restore_retry=False)
logger.debug('Build job found. Checking for an avaliable worker.')
if self._lifecycle_manager.schedule(build_job, self._loop):
self._job_count = self._job_count + 1
logger.debug('Build job scheduled. Running: %s', self._job_count)
else:
logger.debug('All workers are busy. Requeuing.')
self._queue.incomplete(job_item, restore_retry=True, retry_after=0)
2014-11-18 20:45:56 +00:00
yield From(trollius.sleep(WORK_CHECK_TIMEOUT))
@trollius.coroutine
def _initialize(self, loop, host, ssl=None):
self._loop = loop
2014-11-18 20:45:56 +00:00
# Create the WAMP server.
2014-11-18 21:35:03 +00:00
transport_factory = WampWebSocketServerFactory(self._session_factory, debug_wamp=False)
2014-11-18 20:45:56 +00:00
transport_factory.setProtocolOptions(failByDrop=True)
# Initialize the controller server and the WAMP server
create_wsgi_server(self._controller_app, loop=loop, host=host, port=CONTROLLER_PORT, ssl=ssl)
yield From(loop.create_server(transport_factory, host, WEBSOCKET_PORT, ssl=ssl))
# Initialize the work queue checker.
2014-11-18 21:34:09 +00:00
yield From(self._work_checker())