Merge pull request #11 from coreos-inc/nimbus

CloudWatch for build job status
This commit is contained in:
Jimmy Zelinskie 2015-02-18 17:17:28 -05:00
commit 47f8cb77c4
8 changed files with 174 additions and 86 deletions

View file

@ -9,14 +9,15 @@ from autobahn.wamp import types
from aiowsgi import create_server as create_wsgi_server
from flask import Flask
from threading import Event
from trollius.tasks import Task
from trollius.coroutines import From
from datetime import timedelta
from buildman.enums import BuildJobResult, BuildServerStatus
from buildman.jobutil.buildstatus import StatusHandler
from buildman.jobutil.buildjob import BuildJob, BuildJobLoadException
from data import database
from data.queue import WorkQueue
from app import app
from app import app, build_metrics
logger = logging.getLogger(__name__)
@ -27,12 +28,6 @@ MINIMUM_JOB_EXTENSION = timedelta(minutes=2)
HEARTBEAT_PERIOD_SEC = 30
class BuildJobResult(object):
""" Build job result enum """
INCOMPLETE = 'incomplete'
COMPLETE = 'complete'
ERROR = 'error'
class BuilderServer(object):
""" Server which handles both HTTP and WAMP requests, managing the full state of the build
controller.
@ -40,7 +35,7 @@ class BuilderServer(object):
def __init__(self, registry_hostname, queue, build_logs, user_files, lifecycle_manager_klass,
lifecycle_manager_config, manager_hostname):
self._loop = None
self._current_status = 'starting'
self._current_status = BuildServerStatus.STARTING
self._current_components = []
self._job_count = 0
@ -60,7 +55,7 @@ class BuilderServer(object):
self._lifecycle_manager_config = lifecycle_manager_config
self._shutdown_event = Event()
self._current_status = 'running'
self._current_status = BuildServerStatus.RUNNING
self._register_controller()
@ -97,8 +92,14 @@ class BuilderServer(object):
logger.debug('Starting server on port %s, with controller on port %s', websocket_port,
controller_port)
TASKS = [
Task(self._initialize(loop, host, websocket_port, controller_port, ssl)),
Task(self._queue_metrics_updater()),
]
try:
loop.run_until_complete(self._initialize(loop, host, websocket_port, controller_port, ssl))
loop.run_until_complete(trollius.wait(TASKS))
except KeyboardInterrupt:
pass
finally:
@ -106,7 +107,7 @@ class BuilderServer(object):
def close(self):
logger.debug('Requested server shutdown')
self._current_status = 'shutting_down'
self._current_status = BuildServerStatus.SHUTDOWN
self._lifecycle_manager.shutdown()
self._shutdown_event.wait()
logger.debug('Shutting down server')
@ -147,12 +148,14 @@ class BuilderServer(object):
self._job_count = self._job_count - 1
if self._current_status == 'shutting_down' and not self._job_count:
if self._current_status == BuildServerStatus.SHUTDOWN and not self._job_count:
self._shutdown_event.set()
build_metrics.report_completion_status(job_status)
@trollius.coroutine
def _work_checker(self):
while self._current_status == 'running':
while self._current_status == BuildServerStatus.RUNNING:
with database.CloseForLongOperation(app.config):
yield From(trollius.sleep(WORK_CHECK_TIMEOUT))
@ -183,7 +186,11 @@ class BuilderServer(object):
logger.debug('All workers are busy. Requeuing.')
self._queue.incomplete(job_item, restore_retry=True, retry_after=0)
@trollius.coroutine
def _queue_metrics_updater(self):
while self._current_status == BuildServerStatus.RUNNING:
yield From(trollius.sleep(30))
self._queue.update_metrics()
@trollius.coroutine
def _initialize(self, loop, host, websocket_port, controller_port, ssl=None):