Merge pull request #11 from coreos-inc/nimbus
CloudWatch for build job status
This commit is contained in:
commit
47f8cb77c4
8 changed files with 174 additions and 86 deletions
12
buildman/enums.py
Normal file
12
buildman/enums.py
Normal file
|
@ -0,0 +1,12 @@
|
|||
class BuildJobResult(object):
|
||||
""" Build job result enum """
|
||||
INCOMPLETE = 'incomplete'
|
||||
COMPLETE = 'complete'
|
||||
ERROR = 'error'
|
||||
|
||||
|
||||
class BuildServerStatus(object):
|
||||
""" Build server status enum """
|
||||
STARTING = 'starting'
|
||||
RUNNING = 'running'
|
||||
SHUTDOWN = 'shutting_down'
|
72
buildman/jobutil/buildreporter.py
Normal file
72
buildman/jobutil/buildreporter.py
Normal file
|
@ -0,0 +1,72 @@
|
|||
from trollius import From
|
||||
|
||||
from buildman.enums import BuildJobResult
|
||||
from util.cloudwatch import get_queue
|
||||
|
||||
|
||||
class BuildReporter(object):
|
||||
"""
|
||||
Base class for reporting build statuses to a metrics service.
|
||||
"""
|
||||
def report_completion_status(self, status):
|
||||
"""
|
||||
Method to invoke the recording of build's completion status to a metric service.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class NullReporter(BuildReporter):
|
||||
"""
|
||||
The /dev/null of BuildReporters.
|
||||
"""
|
||||
def report_completion_status(self, *args):
|
||||
pass
|
||||
|
||||
|
||||
class CloudWatchBuildReporter(BuildReporter):
|
||||
"""
|
||||
Implements a BuildReporter for Amazon's CloudWatch.
|
||||
"""
|
||||
def __init__(self, queue, namespace_name, completed_name, failed_name, incompleted_name):
|
||||
self._queue = queue
|
||||
self._namespace_name = namespace_name
|
||||
self._completed_name = completed_name
|
||||
self._failed_name = failed_name
|
||||
self._incompleted_name = incompleted_name
|
||||
|
||||
def _send_to_queue(self, *args, **kwargs):
|
||||
self._queue.put((args, kwargs))
|
||||
|
||||
def report_completion_status(self, status):
|
||||
if status == BuildJobResult.COMPLETE:
|
||||
status_name = self._completed_name
|
||||
elif status == BuildJobResult.ERROR:
|
||||
status_name = self._failed_name
|
||||
elif status == BuildJobResult.INCOMPLETE:
|
||||
status_name = self._incompleted_name
|
||||
else:
|
||||
return
|
||||
|
||||
self._send_to_queue(self._namespace_name, status_name, 1, unit='Count')
|
||||
|
||||
|
||||
class BuildMetrics(object):
|
||||
"""
|
||||
BuildMetrics initializes a reporter for recording the status of build completions.
|
||||
"""
|
||||
def __init__(self, app=None):
|
||||
self._app = app
|
||||
self._reporter = NullReporter()
|
||||
if app is not None:
|
||||
reporter_type = app.config.get('BUILD_METRICS_TYPE', 'Null')
|
||||
if reporter_type == 'CloudWatch':
|
||||
namespace = app.config['BUILD_METRICS_NAMESPACE']
|
||||
completed_name = app.config['BUILD_METRICS_COMPLETED_NAME']
|
||||
failed_name = app.config['BUILD_METRICS_FAILED_NAME']
|
||||
incompleted_name = app.config['BUILD_METRICS_INCOMPLETED_NAME']
|
||||
request_queue = get_queue(app)
|
||||
self._reporter = CloudWatchBuildReporter(request_queue, namespace, completed_name,
|
||||
failed_name, incompleted_name)
|
||||
|
||||
def __getattr__(self, name):
|
||||
return getattr(self._reporter, name, None)
|
|
@ -9,14 +9,15 @@ from autobahn.wamp import types
|
|||
from aiowsgi import create_server as create_wsgi_server
|
||||
from flask import Flask
|
||||
from threading import Event
|
||||
from trollius.tasks import Task
|
||||
from trollius.coroutines import From
|
||||
from datetime import timedelta
|
||||
|
||||
from buildman.enums import BuildJobResult, BuildServerStatus
|
||||
from buildman.jobutil.buildstatus import StatusHandler
|
||||
from buildman.jobutil.buildjob import BuildJob, BuildJobLoadException
|
||||
from data import database
|
||||
from data.queue import WorkQueue
|
||||
from app import app
|
||||
from app import app, build_metrics
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -27,12 +28,6 @@ MINIMUM_JOB_EXTENSION = timedelta(minutes=2)
|
|||
|
||||
HEARTBEAT_PERIOD_SEC = 30
|
||||
|
||||
class BuildJobResult(object):
|
||||
""" Build job result enum """
|
||||
INCOMPLETE = 'incomplete'
|
||||
COMPLETE = 'complete'
|
||||
ERROR = 'error'
|
||||
|
||||
class BuilderServer(object):
|
||||
""" Server which handles both HTTP and WAMP requests, managing the full state of the build
|
||||
controller.
|
||||
|
@ -40,7 +35,7 @@ class BuilderServer(object):
|
|||
def __init__(self, registry_hostname, queue, build_logs, user_files, lifecycle_manager_klass,
|
||||
lifecycle_manager_config, manager_hostname):
|
||||
self._loop = None
|
||||
self._current_status = 'starting'
|
||||
self._current_status = BuildServerStatus.STARTING
|
||||
self._current_components = []
|
||||
self._job_count = 0
|
||||
|
||||
|
@ -60,7 +55,7 @@ class BuilderServer(object):
|
|||
self._lifecycle_manager_config = lifecycle_manager_config
|
||||
|
||||
self._shutdown_event = Event()
|
||||
self._current_status = 'running'
|
||||
self._current_status = BuildServerStatus.RUNNING
|
||||
|
||||
self._register_controller()
|
||||
|
||||
|
@ -97,8 +92,14 @@ class BuilderServer(object):
|
|||
|
||||
logger.debug('Starting server on port %s, with controller on port %s', websocket_port,
|
||||
controller_port)
|
||||
|
||||
TASKS = [
|
||||
Task(self._initialize(loop, host, websocket_port, controller_port, ssl)),
|
||||
Task(self._queue_metrics_updater()),
|
||||
]
|
||||
|
||||
try:
|
||||
loop.run_until_complete(self._initialize(loop, host, websocket_port, controller_port, ssl))
|
||||
loop.run_until_complete(trollius.wait(TASKS))
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
|
@ -106,7 +107,7 @@ class BuilderServer(object):
|
|||
|
||||
def close(self):
|
||||
logger.debug('Requested server shutdown')
|
||||
self._current_status = 'shutting_down'
|
||||
self._current_status = BuildServerStatus.SHUTDOWN
|
||||
self._lifecycle_manager.shutdown()
|
||||
self._shutdown_event.wait()
|
||||
logger.debug('Shutting down server')
|
||||
|
@ -147,12 +148,14 @@ class BuilderServer(object):
|
|||
|
||||
self._job_count = self._job_count - 1
|
||||
|
||||
if self._current_status == 'shutting_down' and not self._job_count:
|
||||
if self._current_status == BuildServerStatus.SHUTDOWN and not self._job_count:
|
||||
self._shutdown_event.set()
|
||||
|
||||
build_metrics.report_completion_status(job_status)
|
||||
|
||||
@trollius.coroutine
|
||||
def _work_checker(self):
|
||||
while self._current_status == 'running':
|
||||
while self._current_status == BuildServerStatus.RUNNING:
|
||||
with database.CloseForLongOperation(app.config):
|
||||
yield From(trollius.sleep(WORK_CHECK_TIMEOUT))
|
||||
|
||||
|
@ -183,7 +186,11 @@ class BuilderServer(object):
|
|||
logger.debug('All workers are busy. Requeuing.')
|
||||
self._queue.incomplete(job_item, restore_retry=True, retry_after=0)
|
||||
|
||||
|
||||
@trollius.coroutine
|
||||
def _queue_metrics_updater(self):
|
||||
while self._current_status == BuildServerStatus.RUNNING:
|
||||
yield From(trollius.sleep(30))
|
||||
self._queue.update_metrics()
|
||||
|
||||
@trollius.coroutine
|
||||
def _initialize(self, loop, host, websocket_port, controller_port, ssl=None):
|
||||
|
|
Reference in a new issue