Merge pull request #1830 from coreos-inc/superuser-dashboard
Add prometheus stats to enable better dashboarding
This commit is contained in:
commit
ad4efba802
18 changed files with 128 additions and 199 deletions
|
@ -72,6 +72,11 @@ class BuildJob(object):
|
|||
""" Returns the namespace under which this build is running. """
|
||||
return self.repo_build.repository.namespace_user.username
|
||||
|
||||
@property
|
||||
def repo_name(self):
|
||||
""" Returns the name of the repository under which this build is running. """
|
||||
return self.repo_build.repository.name
|
||||
|
||||
@property
|
||||
def repo_build(self):
|
||||
return self._load_repo_build()
|
||||
|
|
|
@ -182,6 +182,7 @@ class EphemeralBuilderManager(BaseManager):
|
|||
self._build_uuid_to_info.pop(build_job.build_uuid, None)
|
||||
raise Return()
|
||||
|
||||
executor_name = build_info.executor_name
|
||||
execution_id = build_info.execution_id
|
||||
|
||||
# If we have not yet received a heartbeat, then the node failed to boot in some way. We mark
|
||||
|
@ -196,7 +197,7 @@ class EphemeralBuilderManager(BaseManager):
|
|||
execution_id))
|
||||
if got_lock:
|
||||
logger.warning('Marking job %s as incomplete', build_job.build_uuid)
|
||||
self.job_complete_callback(build_job, BuildJobResult.INCOMPLETE)
|
||||
self.job_complete_callback(build_job, BuildJobResult.INCOMPLETE, executor_name)
|
||||
|
||||
# Finally, we terminate the build execution for the job. We don't do this under a lock as
|
||||
# terminating a node is an atomic operation; better to make sure it is terminated than not.
|
||||
|
@ -550,7 +551,10 @@ class EphemeralBuilderManager(BaseManager):
|
|||
build_job.build_uuid, job_status)
|
||||
|
||||
# Mark the job as completed.
|
||||
self.job_complete_callback(build_job, job_status)
|
||||
build_info = self._build_uuid_to_info.get(build_job.build_uuid, None)
|
||||
executor_name = build_info.executor_name if build_info else None
|
||||
|
||||
self.job_complete_callback(build_job, job_status, executor_name)
|
||||
|
||||
# Kill the ephmeral builder.
|
||||
yield From(self.kill_builder_executor(build_job.build_uuid))
|
||||
|
|
|
@ -141,7 +141,7 @@ class BuilderServer(object):
|
|||
self._queue.extend_processing(build_job.job_item, seconds_from_now=JOB_TIMEOUT_SECONDS,
|
||||
minimum_extension=MINIMUM_JOB_EXTENSION)
|
||||
|
||||
def _job_complete(self, build_job, job_status):
|
||||
def _job_complete(self, build_job, job_status, executor_name=None):
|
||||
if job_status == BuildJobResult.INCOMPLETE:
|
||||
self._queue.incomplete(build_job.job_item, restore_retry=False, retry_after=30)
|
||||
else:
|
||||
|
@ -152,7 +152,7 @@ class BuilderServer(object):
|
|||
if self._current_status == BuildServerStatus.SHUTDOWN and not self._job_count:
|
||||
self._shutdown_event.set()
|
||||
|
||||
report_completion_status(job_status)
|
||||
_report_completion_status(build_job, job_status, executor_name)
|
||||
|
||||
@trollius.coroutine
|
||||
def _work_checker(self):
|
||||
|
@ -229,7 +229,10 @@ class BuilderServer(object):
|
|||
# Initialize the work queue checker.
|
||||
yield From(self._work_checker())
|
||||
|
||||
def report_completion_status(status):
|
||||
def _report_completion_status(build_job, status, executor_name):
|
||||
metric_queue.build_counter.Inc(labelvalues=[status])
|
||||
metric_queue.repository_build_completed.Inc(labelvalues=[build_job.namespace, build_job.repo_name,
|
||||
status, executor_name or 'executor'])
|
||||
if status == BuildJobResult.COMPLETE:
|
||||
status_name = 'CompleteBuilds'
|
||||
elif status == BuildJobResult.ERROR:
|
||||
|
@ -240,4 +243,3 @@ def report_completion_status(status):
|
|||
return
|
||||
|
||||
metric_queue.put_deprecated(status_name, 1, unit='Count')
|
||||
metric_queue.build_counter.Inc(labelvalues=[status_name])
|
||||
|
|
Reference in a new issue